summaryrefslogtreecommitdiffstats
path: root/video/out
diff options
context:
space:
mode:
Diffstat (limited to 'video/out')
-rw-r--r--video/out/opengl/common.c4
-rw-r--r--video/out/opengl/context.c1
-rw-r--r--video/out/opengl/video.c2
-rw-r--r--video/out/opengl/video_shaders.c124
-rw-r--r--video/out/opengl/video_shaders.h3
5 files changed, 90 insertions, 44 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index fc0ec547db..6913b77433 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -579,8 +579,8 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
int glsl_major = 0, glsl_minor = 0;
if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
gl->glsl_version = glsl_major * 100 + glsl_minor;
- // GLSL 400 defines "sample" as keyword - breaks custom shaders.
- gl->glsl_version = MPMIN(gl->glsl_version, 330);
+ // restrict GLSL version to be forwards compatible
+ gl->glsl_version = MPMIN(gl->glsl_version, 400);
}
if (is_software_gl(gl)) {
diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c
index a3b92ac8b4..20b16b73ef 100644
--- a/video/out/opengl/context.c
+++ b/video/out/opengl/context.c
@@ -92,6 +92,7 @@ static const struct mpgl_driver *const backends[] = {
// 0-terminated list of desktop GL versions a backend should try to
// initialize. The first entry is the most preferred version.
const int mpgl_preferred_gl_versions[] = {
+ 400,
330,
320,
310,
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 9823f1066d..a83e6e34b7 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -1583,7 +1583,7 @@ static void pass_sample(struct gl_video *p, struct img_tex tex,
} else if (strcmp(name, "oversample") == 0) {
pass_sample_oversample(p->sc, scaler, w, h);
} else if (scaler->kernel && scaler->kernel->polar) {
- pass_sample_polar(p->sc, scaler);
+ pass_sample_polar(p->sc, scaler, tex.components, p->gl->glsl_version);
} else if (scaler->kernel) {
pass_sample_separated(p, tex, scaler, w, h);
} else {
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 9ed85ffa09..cbd566ff0c 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -105,62 +105,106 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler
GLSLF("}\n");
}
-void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
+// Subroutine for computing and adding an individual texel contribution
+// If subtexel < 0, samples directly. Otherwise, takes the texel from cN[comp]
+static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
+ int x, int y, int subtexel, int components)
{
double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale;
double radius_cutoff = scaler->kernel->radius_cutoff;
- int bound = ceil(radius_cutoff);
- bool use_ar = scaler->conf.antiring > 0;
+ // Since we can't know the subpixel position in advance, assume a
+ // worst case scenario
+ int yy = y > 0 ? y-1 : y;
+ int xx = x > 0 ? x-1 : x;
+ double dmax = sqrt(xx*xx + yy*yy);
+ // Skip samples definitely outside the radius
+ if (dmax >= radius_cutoff)
+ return;
+ GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
+ // Check for samples that might be skippable
+ bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
+ if (maybe_skippable)
+ GLSLF("if (d < %f) {\n", radius_cutoff / radius);
+
+ // get the weight for this pixel
+ if (scaler->gl_target == GL_TEXTURE_1D) {
+ GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
+ scaler->lut_size);
+ } else {
+ GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
+ scaler->lut_size);
+ }
+ GLSL(wsum += w;)
+
+ if (subtexel < 0) {
+ GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
+ GLSL(color += vec4(w) * c0;)
+ } else {
+ for (int n = 0; n < components; n++)
+ GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel);
+ }
+
+ if (maybe_skippable)
+ GLSLF("}\n");
+}
+
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+ int components, int glsl_version)
+{
GLSL(color = vec4(0.0);)
GLSLF("{\n");
GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
GLSL(vec2 base = pos - fcoord * pt;)
- GLSL(vec4 c;)
GLSLF("float w, d, wsum = 0.0;\n");
- if (use_ar) {
- GLSL(vec4 lo = vec4(1.0);)
- GLSL(vec4 hi = vec4(0.0);)
- }
+ for (int n = 0; n < components; n++)
+ GLSLF("vec4 c%d;\n", n);
+
gl_sc_uniform_tex(sc, "lut", scaler->gl_target, scaler->gl_lut);
+
GLSLF("// scaler samples\n");
- for (int y = 1-bound; y <= bound; y++) {
- for (int x = 1-bound; x <= bound; x++) {
- // Since we can't know the subpixel position in advance, assume a
- // worst case scenario
- int yy = y > 0 ? y-1 : y;
- int xx = x > 0 ? x-1 : x;
- double dmax = sqrt(xx*xx + yy*yy);
- // Skip samples definitely outside the radius
- if (dmax >= radius_cutoff)
- continue;
- GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
- // Check for samples that might be skippable
- bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
- if (maybe_skippable)
- GLSLF("if (d < %f) {\n", radius_cutoff / radius);
- if (scaler->gl_target == GL_TEXTURE_1D) {
- GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
- scaler->lut_size);
+ int bound = ceil(scaler->kernel->radius_cutoff);
+ for (int y = 1-bound; y <= bound; y += 2) {
+ for (int x = 1-bound; x <= bound; x += 2) {
+ // First we figure out whether it's more efficient to use direct
+ // sampling or gathering. The problem is that gathering 4 texels
+ // only to discard some of them is very wasteful, so only do it if
+ // we suspect it will be a win rather than a loss. This is the case
+ // exactly when all four texels are within bounds
+ bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff;
+
+ // textureGather is only supported in GLSL 400+
+ if (glsl_version < 400)
+ use_gather = false;
+
+ if (use_gather) {
+ // Gather the four surrounding texels simultaneously
+ for (int n = 0; n < components; n++) {
+ GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n",
+ n, x, y, n);
+ }
+
+ // Mix in all of the points with their weights
+ for (int p = 0; p < 4; p++) {
+ // The four texels are gathered counterclockwise starting
+ // from the bottom left
+ static const int xo[4] = {0, 1, 1, 0};
+ static const int yo[4] = {1, 1, 0, 0};
+ if (x+xo[p] > bound || y+yo[p] > bound)
+ continue;
+ polar_sample(sc, scaler, x+xo[p], y+yo[p], p, components);
+ }
} else {
- GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
- scaler->lut_size);
+ // switch to direct sampling instead, for efficiency/compatibility
+ for (int yy = y; yy <= bound && yy <= y+1; yy++) {
+ for (int xx = x; xx <= bound && xx <= x+1; xx++)
+ polar_sample(sc, scaler, xx, yy, -1, components);
+ }
}
- GLSL(wsum += w;)
- GLSLF("c = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
- GLSL(color += vec4(w) * c;)
- if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
- GLSL(lo = min(lo, c);)
- GLSL(hi = max(hi, c);)
- }
- if (maybe_skippable)
- GLSLF("}\n");
}
}
+
GLSL(color = color / vec4(wsum);)
- if (use_ar)
- GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
- scaler->conf.antiring);
GLSLF("}\n");
}
diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h
index 207824b169..6498033ad1 100644
--- a/video/out/opengl/video_shaders.h
+++ b/video/out/opengl/video_shaders.h
@@ -30,7 +30,8 @@ extern const struct m_sub_options deband_conf;
void sampler_prelude(struct gl_shader_cache *sc, int tex_num);
void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
int d_x, int d_y);
-void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler);
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+ int components, int glsl_version);
void pass_sample_bicubic_fast(struct gl_shader_cache *sc);
void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
int w, int h);