summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--video/out/opengl/common.c9
-rw-r--r--video/out/opengl/common.h2
-rw-r--r--video/out/opengl/utils.c7
-rw-r--r--video/out/opengl/video.c11
-rw-r--r--video/out/opengl/video_shaders.c1
5 files changed, 26 insertions, 4 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 9af21856ab..c7a714817a 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -357,6 +357,11 @@ static const struct gl_functions gl_functions[] = {
{0},
},
},
+ {
+ .ver_core = 430,
+ .extension = "GL_ARB_arrays_of_arrays",
+ .provides = MPGL_CAP_NESTED_ARRAY,
+ },
// Swap control, always an OS specific extension
// The OSX code loads this manually.
{
@@ -619,6 +624,10 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
mp_verbose(log, "Detected suspected software renderer.\n");
}
+ // GL_ARB_compute_shader & GL_ARB_shader_image_load_store
+ if (gl->DispatchCompute && gl->BindImageTexture)
+ gl->mpgl_caps |= MPGL_CAP_COMPUTE_SHADER;
+
// Provided for simpler handling if no framebuffer support is available.
if (!gl->BindFramebuffer)
gl->BindFramebuffer = &dummy_glBindFramebuffer;
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
index eec7806624..6d8015c8b3 100644
--- a/video/out/opengl/common.h
+++ b/video/out/opengl/common.h
@@ -55,6 +55,8 @@ enum {
MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float
MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float
MPGL_CAP_SSBO = (1 << 21), // GL_ARB_shader_storage_buffer_object
+ MPGL_CAP_COMPUTE_SHADER = (1 << 22), // GL_ARB_compute_shader & GL_ARB_shader_image_load_store
+ MPGL_CAP_NESTED_ARRAY = (1 << 23), // GL_ARB_arrays_of_arrays
MPGL_CAP_SW = (1 << 30), // indirect or sw renderer
};
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index afbd6f65af..451010fffa 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -777,6 +777,8 @@ void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint textur
void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo,
char *format, ...)
{
+ gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object");
+
struct sc_buffer *b = find_buffer(sc, name);
b->binding = sc->next_buffer_binding++;
b->ssbo = ssbo;
@@ -1179,6 +1181,11 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type)
// set up shader text (header + uniforms + body)
bstr *header = &sc->tmp[0];
ADD(header, "#version %d%s\n", gl->glsl_version, gl->es >= 300 ? " es" : "");
+ if (type == GL_COMPUTE_SHADER) {
+ // This extension cannot be enabled in fragment shader. Enable it as
+ // an exception for compute shader.
+ ADD(header, "#extension GL_ARB_compute_shader : enable\n");
+ }
for (int n = 0; n < sc->num_exts; n++)
ADD(header, "#extension %s : enable\n", sc->exts[n]);
if (gl->es) {
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index d4f746e3a2..e1fd60646a 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -1183,7 +1183,7 @@ static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
// Clamp the texture coordinates to prevent sampling out-of-bounds in
// threads that exceed the requested width/height
PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n);
- PRELUDE("const vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n);
+ PRELUDE("vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n);
}
pass_record(p, gl_sc_generate(p->sc, GL_COMPUTE_SHADER));
@@ -1756,10 +1756,12 @@ static void pass_sample(struct gl_video *p, struct img_tex tex,
} else if (strcmp(name, "oversample") == 0) {
pass_sample_oversample(p->sc, scaler, w, h);
} else if (scaler->kernel && scaler->kernel->polar) {
+ bool use_compute_polar = (p->gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER) &&
+ (p->gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY);
// Use a compute shader where possible, fallback to the slower texture
// fragment sampler otherwise. Also use the fragment shader for
// very large kernels to avoid exhausting shmem
- if (p->gl->glsl_version < 430 || scaler->kernel->f.radius > 16) {
+ if (!use_compute_polar || scaler->kernel->f.radius > 16) {
pass_sample_polar(p->sc, scaler, tex.components, p->gl->glsl_version);
} else {
// For performance we want to load at least as many pixels
@@ -3391,7 +3393,8 @@ static void check_gl_features(struct gl_video *p)
bool have_mglsl = gl->glsl_version >= 130; // modern GLSL (1st class arrays etc.)
bool have_texrg = gl->mpgl_caps & MPGL_CAP_TEX_RG;
bool have_tex16 = !gl->es || (gl->mpgl_caps & MPGL_CAP_EXT16);
- bool have_compute = gl->glsl_version >= 430; // easiest way to ensure all
+ bool have_compute = gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER;
+ bool have_ssbo = gl->mpgl_caps & MPGL_CAP_SSBO;
const GLint auto_fbo_fmts[] = {GL_RGBA16, GL_RGBA16F, GL_RGB10_A2,
GL_RGBA8, 0};
@@ -3502,7 +3505,7 @@ static void check_gl_features(struct gl_video *p)
p->opts.deband = 0;
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
}
- if (!have_compute && p->opts.compute_hdr_peak) {
+ if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) {
p->opts.compute_hdr_peak = 0;
MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n");
}
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index fe6e944168..854c829f1d 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -241,6 +241,7 @@ void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler,
gl_sc_uniform_tex(sc, "lut", scaler->gl_target, scaler->gl_lut);
// Load all relevant texels into shmem
+ gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays");
for (int c = 0; c < components; c++)
GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw);