summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-05 00:25:32 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-05 11:21:58 +0200
commitad0d6caac76a0cff9e98912314e749c1fde32d98 (patch)
treee2f4cd6aff87503e05cbca1df4852a11f827f002 /video
parentb387f82aa40a1655590bbb0dffe58f9728a37b49 (diff)
downloadmpv-ad0d6caac76a0cff9e98912314e749c1fde32d98.tar.bz2
mpv-ad0d6caac76a0cff9e98912314e749c1fde32d98.tar.xz
vo_opengl: use textureGatherOffset for polar filters
This is more efficient on my machine (nvidia), but only when applied to groups of exactly 4 texels. So we switch to the more efficient textureGather for groups of 4. Some notes: - textureGatherOffset seems to be faster than textureGather by a non-negligible amount, but for some reason, textureOffset is still slower than a straight-up texture - textureGather* requires GLSL 400; and at least on nvidia, this requires actually allocating a GL 4.0 context. - the code in opengl/common.c that clamped the GLSL version to 330 is deprecated, because the old user shader style has been removed completely in the meantime - To combat the growing complexity of the polar sampling code, we drop the antiringing functionality from EWA shaders completely, since it never really worked well for EWA to begin with. (Horrific artifacting)
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/common.c4
-rw-r--r--video/out/opengl/context.c1
-rw-r--r--video/out/opengl/video.c2
-rw-r--r--video/out/opengl/video_shaders.c124
-rw-r--r--video/out/opengl/video_shaders.h3
5 files changed, 90 insertions, 44 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index fc0ec547db..6913b77433 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -579,8 +579,8 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
int glsl_major = 0, glsl_minor = 0;
if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
gl->glsl_version = glsl_major * 100 + glsl_minor;
- // GLSL 400 defines "sample" as keyword - breaks custom shaders.
- gl->glsl_version = MPMIN(gl->glsl_version, 330);
+ // restrict GLSL version to be forwards compatible
+ gl->glsl_version = MPMIN(gl->glsl_version, 400);
}
if (is_software_gl(gl)) {
diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c
index a3b92ac8b4..20b16b73ef 100644
--- a/video/out/opengl/context.c
+++ b/video/out/opengl/context.c
@@ -92,6 +92,7 @@ static const struct mpgl_driver *const backends[] = {
// 0-terminated list of desktop GL versions a backend should try to
// initialize. The first entry is the most preferred version.
const int mpgl_preferred_gl_versions[] = {
+ 400,
330,
320,
310,
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 9823f1066d..a83e6e34b7 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -1583,7 +1583,7 @@ static void pass_sample(struct gl_video *p, struct img_tex tex,
} else if (strcmp(name, "oversample") == 0) {
pass_sample_oversample(p->sc, scaler, w, h);
} else if (scaler->kernel && scaler->kernel->polar) {
- pass_sample_polar(p->sc, scaler);
+ pass_sample_polar(p->sc, scaler, tex.components, p->gl->glsl_version);
} else if (scaler->kernel) {
pass_sample_separated(p, tex, scaler, w, h);
} else {
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 9ed85ffa09..cbd566ff0c 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -105,62 +105,106 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler
GLSLF("}\n");
}
-void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
+// Subroutine for computing and adding an individual texel contribution
+// If subtexel < 0, samples directly. Otherwise, takes the texel from cN[comp]
+static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
+ int x, int y, int subtexel, int components)
{
double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale;
double radius_cutoff = scaler->kernel->radius_cutoff;
- int bound = ceil(radius_cutoff);
- bool use_ar = scaler->conf.antiring > 0;
+ // Since we can't know the subpixel position in advance, assume a
+ // worst case scenario
+ int yy = y > 0 ? y-1 : y;
+ int xx = x > 0 ? x-1 : x;
+ double dmax = sqrt(xx*xx + yy*yy);
+ // Skip samples definitely outside the radius
+ if (dmax >= radius_cutoff)
+ return;
+ GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
+ // Check for samples that might be skippable
+ bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
+ if (maybe_skippable)
+ GLSLF("if (d < %f) {\n", radius_cutoff / radius);
+
+ // get the weight for this pixel
+ if (scaler->gl_target == GL_TEXTURE_1D) {
+ GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
+ scaler->lut_size);
+ } else {
+ GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
+ scaler->lut_size);
+ }
+ GLSL(wsum += w;)
+
+ if (subtexel < 0) {
+ GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
+ GLSL(color += vec4(w) * c0;)
+ } else {
+ for (int n = 0; n < components; n++)
+ GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel);
+ }
+
+ if (maybe_skippable)
+ GLSLF("}\n");
+}
+
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+ int components, int glsl_version)
+{
GLSL(color = vec4(0.0);)
GLSLF("{\n");
GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
GLSL(vec2 base = pos - fcoord * pt;)
- GLSL(vec4 c;)
GLSLF("float w, d, wsum = 0.0;\n");
- if (use_ar) {
- GLSL(vec4 lo = vec4(1.0);)
- GLSL(vec4 hi = vec4(0.0);)
- }
+ for (int n = 0; n < components; n++)
+ GLSLF("vec4 c%d;\n", n);
+
gl_sc_uniform_tex(sc, "lut", scaler->gl_target, scaler->gl_lut);
+
GLSLF("// scaler samples\n");
- for (int y = 1-bound; y <= bound; y++) {
- for (int x = 1-bound; x <= bound; x++) {
- // Since we can't know the subpixel position in advance, assume a
- // worst case scenario
- int yy = y > 0 ? y-1 : y;
- int xx = x > 0 ? x-1 : x;
- double dmax = sqrt(xx*xx + yy*yy);
- // Skip samples definitely outside the radius
- if (dmax >= radius_cutoff)
- continue;
- GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
- // Check for samples that might be skippable
- bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
- if (maybe_skippable)
- GLSLF("if (d < %f) {\n", radius_cutoff / radius);
- if (scaler->gl_target == GL_TEXTURE_1D) {
- GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
- scaler->lut_size);
+ int bound = ceil(scaler->kernel->radius_cutoff);
+ for (int y = 1-bound; y <= bound; y += 2) {
+ for (int x = 1-bound; x <= bound; x += 2) {
+ // First we figure out whether it's more efficient to use direct
+ // sampling or gathering. The problem is that gathering 4 texels
+ // only to discard some of them is very wasteful, so only do it if
+ // we suspect it will be a win rather than a loss. This is the case
+ // exactly when all four texels are within bounds
+ bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff;
+
+ // textureGather is only supported in GLSL 400+
+ if (glsl_version < 400)
+ use_gather = false;
+
+ if (use_gather) {
+ // Gather the four surrounding texels simultaneously
+ for (int n = 0; n < components; n++) {
+ GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n",
+ n, x, y, n);
+ }
+
+ // Mix in all of the points with their weights
+ for (int p = 0; p < 4; p++) {
+ // The four texels are gathered counterclockwise starting
+ // from the bottom left
+ static const int xo[4] = {0, 1, 1, 0};
+ static const int yo[4] = {1, 1, 0, 0};
+ if (x+xo[p] > bound || y+yo[p] > bound)
+ continue;
+ polar_sample(sc, scaler, x+xo[p], y+yo[p], p, components);
+ }
} else {
- GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
- scaler->lut_size);
+ // switch to direct sampling instead, for efficiency/compatibility
+ for (int yy = y; yy <= bound && yy <= y+1; yy++) {
+ for (int xx = x; xx <= bound && xx <= x+1; xx++)
+ polar_sample(sc, scaler, xx, yy, -1, components);
+ }
}
- GLSL(wsum += w;)
- GLSLF("c = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
- GLSL(color += vec4(w) * c;)
- if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
- GLSL(lo = min(lo, c);)
- GLSL(hi = max(hi, c);)
- }
- if (maybe_skippable)
- GLSLF("}\n");
}
}
+
GLSL(color = color / vec4(wsum);)
- if (use_ar)
- GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
- scaler->conf.antiring);
GLSLF("}\n");
}
diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h
index 207824b169..6498033ad1 100644
--- a/video/out/opengl/video_shaders.h
+++ b/video/out/opengl/video_shaders.h
@@ -30,7 +30,8 @@ extern const struct m_sub_options deband_conf;
void sampler_prelude(struct gl_shader_cache *sc, int tex_num);
void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
int d_x, int d_y);
-void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler);
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+ int components, int glsl_version);
void pass_sample_bicubic_fast(struct gl_shader_cache *sc);
void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
int w, int h);