summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-17 18:11:32 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-24 17:19:31 +0200
commitaad6ba018a17eded2b3f4af2212e0123cfb29b79 (patch)
tree4fd0376511b794c001ba0fd1675a940a764bb728 /video
parenteb54d2ad4d46b6c1f91564604fad05f092772e84 (diff)
downloadmpv-aad6ba018a17eded2b3f4af2212e0123cfb29b79.tar.bz2
mpv-aad6ba018a17eded2b3f4af2212e0123cfb29b79.tar.xz
vo_opengl: support compute shaders
These can either be invoked as dispatch_compute to do a single computation, or finish_pass_fbo (after setting compute_size_minimum) to render to a new texture using a compute shader. To make this stuff all work transparently, we try really, really hard to make compute shaders as identical to fragment shaders as possible in their behavior.
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/common.c19
-rw-r--r--video/out/opengl/common.h5
-rw-r--r--video/out/opengl/context.c1
-rw-r--r--video/out/opengl/gl_headers.h6
-rw-r--r--video/out/opengl/utils.c286
-rw-r--r--video/out/opengl/utils.h7
-rw-r--r--video/out/opengl/video.c93
7 files changed, 317 insertions, 100 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index c7eee414ac..b9536b6c59 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -335,6 +335,23 @@ static const struct gl_functions gl_functions[] = {
{0}
},
},
+ {
+ .ver_core = 420,
+ .extension = "GL_ARB_shader_image_load_store",
+ .functions = (const struct gl_function[]) {
+ DEF_FN(BindImageTexture),
+ DEF_FN(MemoryBarrier),
+ {0}
+ },
+ },
+ {
+ .ver_core = 430,
+ .extension = "GL_ARB_compute_shader",
+ .functions = (const struct gl_function[]) {
+ DEF_FN(DispatchCompute),
+ {0},
+ },
+ },
// Swap control, always an OS specific extension
// The OSX code loads this manually.
{
@@ -589,7 +606,7 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
gl->glsl_version = glsl_major * 100 + glsl_minor;
// restrict GLSL version to be forwards compatible
- gl->glsl_version = MPMIN(gl->glsl_version, 400);
+ gl->glsl_version = MPMIN(gl->glsl_version, 430);
}
if (is_software_gl(gl)) {
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
index 7842c5a910..40208c45e5 100644
--- a/video/out/opengl/common.h
+++ b/video/out/opengl/common.h
@@ -163,6 +163,11 @@ struct GL {
void *);
void (GLAPIENTRY *ProgramBinary)(GLuint, GLenum, const void *, GLsizei);
+ void (GLAPIENTRY *DispatchCompute)(GLuint, GLuint, GLuint);
+ void (GLAPIENTRY *BindImageTexture)(GLuint, GLuint, GLint, GLboolean,
+ GLint, GLenum, GLenum);
+ void (GLAPIENTRY *MemoryBarrier)(GLbitfield);
+
const GLubyte* (GLAPIENTRY *GetStringi)(GLenum, GLuint);
void (GLAPIENTRY *BindAttribLocation)(GLuint, GLuint, const GLchar *);
void (GLAPIENTRY *BindFramebuffer)(GLenum, GLuint);
diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c
index ab98eddbf9..fe454e9741 100644
--- a/video/out/opengl/context.c
+++ b/video/out/opengl/context.c
@@ -93,6 +93,7 @@ static const struct mpgl_driver *const backends[] = {
// initialize. The first entry is the most preferred version.
const int mpgl_preferred_gl_versions[] = {
440,
+ 430,
400,
330,
320,
diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h
index 74a4947137..8f201bb64c 100644
--- a/video/out/opengl/gl_headers.h
+++ b/video/out/opengl/gl_headers.h
@@ -48,7 +48,9 @@
// --- GL 1.5
+#define GL_READ_ONLY 0x88B8
#define GL_WRITE_ONLY 0x88B9
+#define GL_READ_WRITE 0x88BA
// --- GL 3.0
@@ -77,6 +79,10 @@
#define GL_DYNAMIC_STORAGE_BIT 0x0100
#define GL_CLIENT_STORAGE_BIT 0x0200
+// -- GL 4.3 or GL_ARB_compute_shader
+
+#define GL_COMPUTE_SHADER 0x91B9
+
// --- GL_NV_vdpau_interop
#define GLvdpauSurfaceNV GLintptr
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 2624ad7715..f1e0081b10 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -265,8 +265,11 @@ bool fbotex_init(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
// Like fbotex_init(), except it can be called on an already initialized FBO;
// and if the parameters are the same as the previous call, do not touch it.
-// flags can be 0, or a combination of FBOTEX_FUZZY_W and FBOTEX_FUZZY_H.
+// flags can be 0, or a combination of FBOTEX_FUZZY_W, FBOTEX_FUZZY_H and
+// FBOTEX_COMPUTE.
// Enabling FUZZY for W or H means the w or h does not need to be exact.
+// FBOTEX_COMPUTE means that the texture will be written to by a compute shader
+// instead of actually being attached to an FBO.
bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
GLenum iformat, int flags)
{
@@ -315,7 +318,6 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
.iformat = iformat,
};
- gl->GenFramebuffers(1, &fbo->fbo);
gl->GenTextures(1, &fbo->texture);
gl->BindTexture(GL_TEXTURE_2D, fbo->texture);
gl->TexImage2D(GL_TEXTURE_2D, 0, format->internal_format, fbo->rw, fbo->rh, 0,
@@ -328,20 +330,23 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
gl_check_error(gl, log, "after creating framebuffer texture");
- gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo);
- gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
- GL_TEXTURE_2D, fbo->texture, 0);
-
- GLenum err = gl->CheckFramebufferStatus(GL_FRAMEBUFFER);
- if (err != GL_FRAMEBUFFER_COMPLETE) {
- mp_err(log, "Error: framebuffer completeness check failed (error=%d).\n",
- (int)err);
- res = false;
- }
-
- gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+ bool skip_fbo = flags & FBOTEX_COMPUTE;
+ if (!skip_fbo) {
+ gl->GenFramebuffers(1, &fbo->fbo);
+ gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo);
+ gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_TEXTURE_2D, fbo->texture, 0);
+
+ GLenum err = gl->CheckFramebufferStatus(GL_FRAMEBUFFER);
+ if (err != GL_FRAMEBUFFER_COMPLETE) {
+ mp_err(log, "Error: framebuffer completeness check failed (error=%d).\n",
+ (int)err);
+ res = false;
+ }
- gl_check_error(gl, log, "after creating framebuffer");
+ gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+ gl_check_error(gl, log, "after creating framebuffer");
+ }
return res;
}
@@ -462,6 +467,10 @@ struct sc_uniform {
// Set for sampler uniforms.
GLenum tex_target;
GLuint tex_handle;
+ // Set for image uniforms
+ GLuint img_handle;
+ GLenum img_access;
+ GLenum img_iformat;
};
struct sc_cached_uniform {
@@ -475,6 +484,7 @@ struct sc_entry {
int num_uniforms;
bstr frag;
bstr vert;
+ bstr comp;
struct gl_timer *timer;
struct gl_vao vao;
};
@@ -492,6 +502,7 @@ struct gl_shader_cache {
bstr header_text;
bstr text;
int next_texture_unit;
+ int next_image_unit;
struct gl_vao *vao; // deprecated
struct sc_entry *entries;
@@ -545,6 +556,10 @@ void gl_sc_reset(struct gl_shader_cache *sc)
gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
gl->BindTexture(u->tex_target, 0);
}
+ if (u->type == UT_i && u->img_access) {
+ gl->BindImageTexture(u->v.i[0], 0, 0, GL_FALSE, 0,
+ u->img_access, u->img_iformat);
+ }
}
gl->ActiveTexture(GL_TEXTURE0);
}
@@ -556,6 +571,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
talloc_free(sc->uniforms[n].name);
sc->num_uniforms = 0;
sc->next_texture_unit = 1; // not 0, as 0 is "free for use"
+ sc->next_image_unit = 1;
sc->vertex_entries = NULL;
sc->vertex_size = 0;
sc->current_shader = NULL;
@@ -571,6 +587,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
sc->gl->DeleteProgram(e->gl_shader);
talloc_free(e->vert.start);
talloc_free(e->frag.start);
+ talloc_free(e->comp.start);
talloc_free(e->uniforms);
gl_timer_free(e->timer);
gl_vao_uninit(&e->vao);
@@ -639,6 +656,14 @@ void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text)
bstr_xappend(sc, &sc->header_text, text);
}
+void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+ va_list ap;
+ va_start(ap, textf);
+ bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap);
+ va_end(ap);
+}
+
static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
const char *name)
{
@@ -690,6 +715,29 @@ void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture
u->tex_handle = texture;
}
+static const char *mp_image2D_type(GLenum access)
+{
+ switch (access) {
+ case GL_WRITE_ONLY: return "writeonly image2D";
+ case GL_READ_ONLY: return "readonly image2D";
+ case GL_READ_WRITE: return "image2D";
+ default: abort();
+ }
+}
+
+void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint texture,
+ GLuint iformat, GLenum access)
+{
+ struct sc_uniform *u = find_uniform(sc, name);
+ u->type = UT_i;
+ u->size = 1;
+ u->glsl_type = mp_image2D_type(access);
+ u->v.i[0] = sc->next_image_unit++;
+ u->img_handle = texture;
+ u->img_access = access;
+ u->img_iformat = iformat;
+}
+
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f)
{
struct sc_uniform *u = find_uniform(sc, name);
@@ -809,6 +857,10 @@ static void update_uniform(GL *gl, struct sc_entry *e, struct sc_uniform *u, int
gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
gl->BindTexture(u->tex_target, u->tex_handle);
}
+ if (u->img_handle) {
+ gl->BindImageTexture(u->v.i[0], u->img_handle, 0, GL_FALSE, 0,
+ u->img_access, u->img_iformat);
+ }
break;
case UT_f:
if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) {
@@ -846,6 +898,16 @@ void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global,
sc->global = global;
}
+static const char *shader_typestr(GLenum type)
+{
+ switch (type) {
+ case GL_VERTEX_SHADER: return "vertex";
+ case GL_FRAGMENT_SHADER: return "fragment";
+ case GL_COMPUTE_SHADER: return "compute";
+ default: abort();
+ }
+}
+
static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program,
GLenum type, const char *source)
{
@@ -860,7 +922,7 @@ static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program,
gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
- const char *typestr = type == GL_VERTEX_SHADER ? "vertex" : "fragment";
+ const char *typestr = shader_typestr(type);
if (mp_msg_test(sc->log, pri)) {
MP_MSG(sc, pri, "%s shader source:\n", typestr);
mp_log_source(sc->log, pri, source);
@@ -911,23 +973,28 @@ static void link_shader(struct gl_shader_cache *sc, GLuint program)
sc->error_state = true;
}
-static GLuint compile_program(struct gl_shader_cache *sc, const char *vertex,
- const char *frag)
+// either 'compute' or both 'vertex' and 'frag' are needed
+static GLuint compile_program(struct gl_shader_cache *sc, struct bstr *vertex,
+ struct bstr *frag, struct bstr *compute)
{
GL *gl = sc->gl;
GLuint prog = gl->CreateProgram();
- compile_attach_shader(sc, prog, GL_VERTEX_SHADER, vertex);
- compile_attach_shader(sc, prog, GL_FRAGMENT_SHADER, frag);
- for (int n = 0; sc->vertex_entries[n].name; n++) {
- char *vname = mp_tprintf(80, "vertex_%s", sc->vertex_entries[n].name);
- gl->BindAttribLocation(prog, n, vname);
+ if (compute)
+ compile_attach_shader(sc, prog, GL_COMPUTE_SHADER, compute->start);
+ if (vertex && frag) {
+ compile_attach_shader(sc, prog, GL_VERTEX_SHADER, vertex->start);
+ compile_attach_shader(sc, prog, GL_FRAGMENT_SHADER, frag->start);
+ for (int n = 0; sc->vertex_entries[n].name; n++) {
+ char *vname = mp_tprintf(80, "vertex_%s", sc->vertex_entries[n].name);
+ gl->BindAttribLocation(prog, n, vname);
+ }
}
link_shader(sc, prog);
return prog;
}
-static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
- const char *frag)
+static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex,
+ struct bstr *frag, struct bstr *compute)
{
GL *gl = sc->gl;
@@ -941,7 +1008,7 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
mp_log_source(sc->log, MSGL_V, sc->text.start);
if (!sc->cache_dir || !sc->cache_dir[0] || !gl->ProgramBinary)
- return compile_program(sc, vertex, frag);
+ return compile_program(sc, vertex, frag, compute);
// Try to load it from a disk cache, or compiling + saving it.
@@ -954,8 +1021,12 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
abort();
av_sha_init(sha, 256);
- av_sha_update(sha, vertex, strlen(vertex) + 1);
- av_sha_update(sha, frag, strlen(frag) + 1);
+ if (vertex)
+ av_sha_update(sha, vertex->start, vertex->len + 1);
+ if (frag)
+ av_sha_update(sha, frag->start, frag->len + 1);
+ if (compute)
+ av_sha_update(sha, compute->start, compute->len + 1);
// In theory, the array could change order, breaking old binaries.
for (int n = 0; sc->vertex_entries[n].name; n++) {
@@ -997,7 +1068,7 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
}
if (!prog) {
- prog = compile_program(sc, vertex, frag);
+ prog = compile_program(sc, vertex, frag, compute);
GLint size = 0;
gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size);
@@ -1040,7 +1111,8 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
// The return value is a mp_pass_perf containing performance metrics for the
// execution of the generated shader. (Note: execution is measured up until
// the corresponding gl_sc_reset call)
-struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
+// 'type' can be either GL_FRAGMENT_SHADER or GL_COMPUTE_SHADER
+struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type)
{
GL *gl = sc->gl;
@@ -1065,81 +1137,106 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
if (gl->mpgl_caps & MPGL_CAP_3D_TEX)
ADD(header, "precision mediump sampler3D;\n");
}
- ADD_BSTR(header, sc->prelude_text);
+
+ if (gl->glsl_version >= 130) {
+ ADD(header, "#define texture1D texture\n");
+ ADD(header, "#define texture3D texture\n");
+ } else {
+ ADD(header, "#define texture texture2D\n");
+ }
+
+ // Additional helpers.
+ ADD(header, "#define LUT_POS(x, lut_size)"
+ " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n");
+
char *vert_in = gl->glsl_version >= 130 ? "in" : "attribute";
char *vert_out = gl->glsl_version >= 130 ? "out" : "varying";
char *frag_in = gl->glsl_version >= 130 ? "in" : "varying";
- // vertex shader: we don't use the vertex shader, so just setup a dummy,
- // which passes through the vertex array attributes.
- bstr *vert_head = &sc->tmp[1];
- ADD_BSTR(vert_head, *header);
- bstr *vert_body = &sc->tmp[2];
- ADD(vert_body, "void main() {\n");
- bstr *frag_vaos = &sc->tmp[3];
- for (int n = 0; sc->vertex_entries[n].name; n++) {
- const struct gl_vao_entry *e = &sc->vertex_entries[n];
- const char *glsl_type = vao_glsl_type(e);
- if (strcmp(e->name, "position") == 0) {
- // setting raster pos. requires setting gl_Position magic variable
- assert(e->num_elems == 2 && e->type == GL_FLOAT);
- ADD(vert_head, "%s vec2 vertex_position;\n", vert_in);
- ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n");
+ struct bstr *vert = NULL, *frag = NULL, *comp = NULL;
+
+ if (type == GL_FRAGMENT_SHADER) {
+ // vertex shader: we don't use the vertex shader, so just setup a
+ // dummy, which passes through the vertex array attributes.
+ bstr *vert_head = &sc->tmp[1];
+ ADD_BSTR(vert_head, *header);
+ bstr *vert_body = &sc->tmp[2];
+ ADD(vert_body, "void main() {\n");
+ bstr *frag_vaos = &sc->tmp[3];
+ for (int n = 0; sc->vertex_entries[n].name; n++) {
+ const struct gl_vao_entry *e = &sc->vertex_entries[n];
+ const char *glsl_type = vao_glsl_type(e);
+ if (strcmp(e->name, "position") == 0) {
+ // setting raster pos. requires setting gl_Position magic variable
+ assert(e->num_elems == 2 && e->type == GL_FLOAT);
+ ADD(vert_head, "%s vec2 vertex_position;\n", vert_in);
+ ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n");
+ } else {
+ ADD(vert_head, "%s %s vertex_%s;\n", vert_in, glsl_type, e->name);
+ ADD(vert_head, "%s %s %s;\n", vert_out, glsl_type, e->name);
+ ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name);
+ ADD(frag_vaos, "%s %s %s;\n", frag_in, glsl_type, e->name);
+ }
+ }
+ ADD(vert_body, "}\n");
+ vert = vert_head;
+ ADD_BSTR(vert, *vert_body);
+
+ // fragment shader; still requires adding used uniforms and VAO elements
+ frag = &sc->tmp[4];
+ ADD_BSTR(frag, *header);
+ if (gl->glsl_version >= 130)
+ ADD(frag, "out vec4 out_color;\n");
+ ADD_BSTR(frag, *frag_vaos);
+ for (int n = 0; n < sc->num_uniforms; n++) {
+ struct sc_uniform *u = &sc->uniforms[n];
+ ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name);
+ }
+
+ ADD_BSTR(frag, sc->prelude_text);
+ ADD_BSTR(frag, sc->header_text);
+
+ ADD(frag, "void main() {\n");
+ // we require _all_ frag shaders to write to a "vec4 color"
+ ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n");
+ ADD_BSTR(frag, sc->text);
+ if (gl->glsl_version >= 130) {
+ ADD(frag, "out_color = color;\n");
} else {
- ADD(vert_head, "%s %s vertex_%s;\n", vert_in, glsl_type, e->name);
- ADD(vert_head, "%s %s %s;\n", vert_out, glsl_type, e->name);
- ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name);
- ADD(frag_vaos, "%s %s %s;\n", frag_in, glsl_type, e->name);
+ ADD(frag, "gl_FragColor = color;\n");
}
+ ADD(frag, "}\n");
}
- ADD(vert_body, "}\n");
- bstr *vert = vert_head;
- ADD_BSTR(vert, *vert_body);
- // fragment shader; still requires adding used uniforms and VAO elements
- bstr *frag = &sc->tmp[4];
- ADD_BSTR(frag, *header);
- if (gl->glsl_version >= 130) {
- ADD(frag, "#define texture1D texture\n");
- ADD(frag, "#define texture3D texture\n");
- ADD(frag, "out vec4 out_color;\n");
- } else {
- ADD(frag, "#define texture texture2D\n");
- }
- ADD_BSTR(frag, *frag_vaos);
- for (int n = 0; n < sc->num_uniforms; n++) {
- struct sc_uniform *u = &sc->uniforms[n];
- ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name);
- }
+ if (type == GL_COMPUTE_SHADER) {
+ comp = &sc->tmp[4];
+ ADD_BSTR(comp, *header);
- // Additional helpers.
- ADD(frag, "#define LUT_POS(x, lut_size)"
- " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n");
+ for (int n = 0; n < sc->num_uniforms; n++) {
+ struct sc_uniform *u = &sc->uniforms[n];
+ ADD(comp, "uniform %s %s;\n", u->glsl_type, u->name);
+ }
- // custom shader header
- if (sc->header_text.len) {
- ADD(frag, "// header\n");
- ADD_BSTR(frag, sc->header_text);
- ADD(frag, "// body\n");
- }
- ADD(frag, "void main() {\n");
- // we require _all_ frag shaders to write to a "vec4 color"
- ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n");
- ADD_BSTR(frag, sc->text);
- if (gl->glsl_version >= 130) {
- ADD(frag, "out_color = color;\n");
- } else {
- ADD(frag, "gl_FragColor = color;\n");
+ ADD_BSTR(comp, sc->prelude_text);
+ ADD_BSTR(comp, sc->header_text);
+
+ ADD(comp, "void main() {\n");
+ ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience
+ ADD_BSTR(comp, sc->text);
+ ADD(comp, "}\n");
}
- ADD(frag, "}\n");
struct sc_entry *entry = NULL;
for (int n = 0; n < sc->num_entries; n++) {
struct sc_entry *cur = &sc->entries[n];
- if (bstr_equals(cur->frag, *frag) && bstr_equals(cur->vert, *vert)) {
- entry = cur;
- break;
- }
+ if (frag && !bstr_equals(cur->frag, *frag))
+ continue;
+ if (vert && !bstr_equals(cur->vert, *vert))
+ continue;
+ if (comp && !bstr_equals(cur->comp, *comp))
+ continue;
+ entry = cur;
+ break;
}
if (!entry) {
if (sc->num_entries == SC_MAX_ENTRIES)
@@ -1147,14 +1244,15 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
MP_TARRAY_GROW(sc, sc->entries, sc->num_entries);
entry = &sc->entries[sc->num_entries++];
*entry = (struct sc_entry){
- .vert = bstrdup(NULL, *vert),
- .frag = bstrdup(NULL, *frag),
+ .vert = vert ? bstrdup(NULL, *vert) : (struct bstr){0},
+ .frag = frag ? bstrdup(NULL, *frag) : (struct bstr){0},
+ .comp = comp ? bstrdup(NULL, *comp) : (struct bstr){0},
.timer = gl_timer_create(gl),
};
}
- // build vertex shader from vao and cache the locations of the uniform variables
+ // build shader program and cache the locations of the uniform variables
if (!entry->gl_shader) {
- entry->gl_shader = load_program(sc, vert->start, frag->start);
+ entry->gl_shader = load_program(sc, vert, frag, comp);
entry->num_uniforms = 0;
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_cached_uniform un = {
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 4327b74b8f..3dc7e5d72d 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -66,6 +66,7 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
#define FBOTEX_FUZZY_W 1
#define FBOTEX_FUZZY_H 2
#define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H)
+#define FBOTEX_COMPUTE 4
void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter);
void fbotex_invalidate(struct fbotex *fbo);
@@ -141,9 +142,13 @@ void gl_sc_hadd(struct gl_shader_cache *sc, const char *text);
void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
PRINTF_ATTRIBUTE(2, 3);
void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text);
+void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
+ PRINTF_ATTRIBUTE(2, 3);
void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target,
GLuint texture);
void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture);
+void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint texture,
+ GLuint iformat, GLenum access);
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f);
void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f);
void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]);
@@ -156,7 +161,7 @@ void gl_sc_set_vertex_format(struct gl_shader_cache *sc,
const struct gl_vao_entry *entries,
size_t vertex_size);
void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
-struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc);
+struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type);
void gl_sc_draw_data(struct gl_shader_cache *sc, GLenum prim, void *ptr,
size_t num);
void gl_sc_reset(struct gl_shader_cache *sc);
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 65b1d95849..ab8f311191 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -260,6 +260,7 @@ struct gl_video {
struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
int pass_tex_num;
int texture_w, texture_h;
+ int compute_w, compute_h; // presence indicates the use of a compute shader
struct gl_transform texture_offset; // texture transform without rotation
int components;
bool use_linear;
@@ -446,6 +447,7 @@ static void gl_video_setup_hooks(struct gl_video *p);
#define GLSL(x) gl_sc_add(p->sc, #x "\n");
#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
+#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__)
static struct bstr load_cached_file(struct gl_video *p, const char *path)
{
@@ -1107,6 +1109,7 @@ static void pass_prepare_src_tex(struct gl_video *p)
char *texture_name = mp_tprintf(32, "texture%d", n);
char *texture_size = mp_tprintf(32, "texture_size%d", n);
char *texture_rot = mp_tprintf(32, "texture_rot%d", n);
+ char *texture_off = mp_tprintf(32, "texture_off%d", n);
char *pixel_size = mp_tprintf(32, "pixel_size%d", n);
if (gl_is_integer_format(s->gl_format)) {
@@ -1121,11 +1124,80 @@ static void pass_prepare_src_tex(struct gl_video *p)
}
gl_sc_uniform_vec2(sc, texture_size, f);
gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m);
+ gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t);
gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0],
1.0f / f[1]});
}
}
+// Update the compute work group size requirements for the current shader.
+// Since we assume that all shaders can work with bigger working groups, just
+// never smaller ones, this effectively becomes the maximum of all size
+// requirements
+static void compute_size_minimum(struct gl_video *p, int bw, int bh)
+{
+ p->compute_w = MPMAX(p->compute_w, bw);
+ p->compute_h = MPMAX(p->compute_h, bh);
+}
+
+// w/h: the width/height of the compute shader's operating domain (e.g. the
+// target target that needs to be written, or the source texture that needs to
+// be reduced)
+// bw/bh: the width/height of the block (working group), which is tiled over
+// w/h as necessary
+static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
+{
+ GL *gl = p->gl;
+
+ PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", bw, bh);
+
+ pass_prepare_src_tex(p);
+ gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
+
+ // Since we don't actually have vertices, we pretend for convenience
+ // reasons that we do and calculate the right texture coordinates based on
+ // the output sample ID
+ gl_sc_uniform_vec2(p->sc, "out_scale", (GLfloat[2]){ 1.0 / w, 1.0 / h });
+ PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n");
+
+ for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
+ struct img_tex *s = &p->pass_tex[n];
+ if (!s->gl_tex)
+ continue;
+
+ // We need to rescale the coordinates to the true texture size
+ char tex_scale[32];
+ snprintf(tex_scale, sizeof(tex_scale), "tex_scale%d", n);
+ gl_sc_uniform_vec2(p->sc, tex_scale, (GLfloat[2]){
+ (float)s->w / s->tex_w,
+ (float)s->h / s->tex_h,
+ });
+
+ PRELUDE("#define texcoord%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n);
+ PRELUDE("#define texcoord%d_rot(id) (texture_rot%d * texcoord%d_raw(id) + "
+ "pixel_size%d * texture_off%d)\n", n, n, n, n, n);
+ // Clamp the texture coordinates to prevent sampling out-of-bounds in
+ // threads that exceed the requested width/height
+ PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n);
+ PRELUDE("const vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n);
+ }
+
+ pass_record(p, gl_sc_generate(p->sc, GL_COMPUTE_SHADER));
+
+ // always round up when dividing to make sure we don't leave off a part of
+ // the image
+ int num_x = (w + bw - 1) / bw,
+ num_y = (h + bh - 1) / bh;
+
+ gl->DispatchCompute(num_x, num_y, 1);
+ gl_sc_reset(p->sc);
+
+ debug_check_gl(p, "after dispatching compute shader");
+
+ memset(&p->pass_tex, 0, sizeof(p->pass_tex));
+ p->pass_tex_num = 0;
+}
+
static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
const struct mp_rect *dst)
{
@@ -1169,7 +1241,7 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
GL *gl = p->gl;
pass_prepare_src_tex(p);
gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
- pass_record(p, gl_sc_generate(p->sc));
+ pass_record(p, gl_sc_generate(p->sc, GL_FRAGMENT_SHADER));
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
render_pass_quad(p, vp_w, vp_h, dst);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -1187,10 +1259,23 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
int w, int h, int flags)
{
+ bool use_compute = p->compute_w > 0 && p->compute_h > 0;
+ if (use_compute)
+ flags |= FBOTEX_COMPUTE;
+
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
- finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
- &(struct mp_rect){0, 0, w, h});
+ if (use_compute) {
+ gl_sc_uniform_image2D(p->sc, "out_image", dst_fbo->texture,
+ dst_fbo->iformat, GL_WRITE_ONLY);
+ GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
+ dispatch_compute(p, w, h, p->compute_w, p->compute_h);
+ } else {
+ finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
+ &(struct mp_rect){0, 0, w, h});
+ }
+
+ p->compute_w = p->compute_h = 0;
}
static const char *get_tex_swizzle(struct img_tex *img)
@@ -2479,7 +2564,7 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
pass_colormanage(p, csp_srgb, true);
}
- pass_record(p, gl_sc_generate(p->sc));
+ pass_record(p, gl_sc_generate(p->sc, GL_FRAGMENT_SHADER));
mpgl_osd_draw_finish(p->osd, vp_w, vp_h, n, p->sc);
gl_sc_reset(p->sc);
}