summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/video_shaders.c
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-05 00:25:32 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-05 11:21:58 +0200
commitad0d6caac76a0cff9e98912314e749c1fde32d98 (patch)
treee2f4cd6aff87503e05cbca1df4852a11f827f002 /video/out/opengl/video_shaders.c
parentb387f82aa40a1655590bbb0dffe58f9728a37b49 (diff)
downloadmpv-ad0d6caac76a0cff9e98912314e749c1fde32d98.tar.bz2
mpv-ad0d6caac76a0cff9e98912314e749c1fde32d98.tar.xz
vo_opengl: use textureGatherOffset for polar filters
This is more efficient on my machine (nvidia), but only when applied to groups of exactly 4 texels. So we switch to the more efficient textureGather for groups of 4. Some notes: - textureGatherOffset seems to be faster than textureGather by a non-negligible amount, but for some reason, textureOffset is still slower than a straight-up texture - textureGather* requires GLSL 400; and at least on nvidia, this requires actually allocating a GL 4.0 context. - the code in opengl/common.c that clamped the GLSL version to 330 is deprecated, because the old user shader style has been removed completely in the meantime - To combat the growing complexity of the polar sampling code, we drop the antiringing functionality from EWA shaders completely, since it never really worked well for EWA to begin with. (Horrific artifacting)
Diffstat (limited to 'video/out/opengl/video_shaders.c')
-rw-r--r--video/out/opengl/video_shaders.c124
1 files changed, 84 insertions, 40 deletions
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 9ed85ffa09..cbd566ff0c 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -105,62 +105,106 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler
GLSLF("}\n");
}
-void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
+// Subroutine for computing and adding an individual texel contribution
+// If subtexel < 0, samples directly. Otherwise, takes the texel from cN[comp]
+static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
+ int x, int y, int subtexel, int components)
{
double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale;
double radius_cutoff = scaler->kernel->radius_cutoff;
- int bound = ceil(radius_cutoff);
- bool use_ar = scaler->conf.antiring > 0;
+ // Since we can't know the subpixel position in advance, assume a
+ // worst case scenario
+ int yy = y > 0 ? y-1 : y;
+ int xx = x > 0 ? x-1 : x;
+ double dmax = sqrt(xx*xx + yy*yy);
+ // Skip samples definitely outside the radius
+ if (dmax >= radius_cutoff)
+ return;
+ GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
+ // Check for samples that might be skippable
+ bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
+ if (maybe_skippable)
+ GLSLF("if (d < %f) {\n", radius_cutoff / radius);
+
+ // get the weight for this pixel
+ if (scaler->gl_target == GL_TEXTURE_1D) {
+ GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
+ scaler->lut_size);
+ } else {
+ GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
+ scaler->lut_size);
+ }
+ GLSL(wsum += w;)
+
+ if (subtexel < 0) {
+ GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
+ GLSL(color += vec4(w) * c0;)
+ } else {
+ for (int n = 0; n < components; n++)
+ GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel);
+ }
+
+ if (maybe_skippable)
+ GLSLF("}\n");
+}
+
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+ int components, int glsl_version)
+{
GLSL(color = vec4(0.0);)
GLSLF("{\n");
GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
GLSL(vec2 base = pos - fcoord * pt;)
- GLSL(vec4 c;)
GLSLF("float w, d, wsum = 0.0;\n");
- if (use_ar) {
- GLSL(vec4 lo = vec4(1.0);)
- GLSL(vec4 hi = vec4(0.0);)
- }
+ for (int n = 0; n < components; n++)
+ GLSLF("vec4 c%d;\n", n);
+
gl_sc_uniform_tex(sc, "lut", scaler->gl_target, scaler->gl_lut);
+
GLSLF("// scaler samples\n");
- for (int y = 1-bound; y <= bound; y++) {
- for (int x = 1-bound; x <= bound; x++) {
- // Since we can't know the subpixel position in advance, assume a
- // worst case scenario
- int yy = y > 0 ? y-1 : y;
- int xx = x > 0 ? x-1 : x;
- double dmax = sqrt(xx*xx + yy*yy);
- // Skip samples definitely outside the radius
- if (dmax >= radius_cutoff)
- continue;
- GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
- // Check for samples that might be skippable
- bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
- if (maybe_skippable)
- GLSLF("if (d < %f) {\n", radius_cutoff / radius);
- if (scaler->gl_target == GL_TEXTURE_1D) {
- GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
- scaler->lut_size);
+ int bound = ceil(scaler->kernel->radius_cutoff);
+ for (int y = 1-bound; y <= bound; y += 2) {
+ for (int x = 1-bound; x <= bound; x += 2) {
+ // First we figure out whether it's more efficient to use direct
+ // sampling or gathering. The problem is that gathering 4 texels
+ // only to discard some of them is very wasteful, so only do it if
+ // we suspect it will be a win rather than a loss. This is the case
+ // exactly when all four texels are within bounds
+ bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff;
+
+ // textureGather is only supported in GLSL 400+
+ if (glsl_version < 400)
+ use_gather = false;
+
+ if (use_gather) {
+ // Gather the four surrounding texels simultaneously
+ for (int n = 0; n < components; n++) {
+ GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n",
+ n, x, y, n);
+ }
+
+ // Mix in all of the points with their weights
+ for (int p = 0; p < 4; p++) {
+ // The four texels are gathered counterclockwise starting
+ // from the bottom left
+ static const int xo[4] = {0, 1, 1, 0};
+ static const int yo[4] = {1, 1, 0, 0};
+ if (x+xo[p] > bound || y+yo[p] > bound)
+ continue;
+ polar_sample(sc, scaler, x+xo[p], y+yo[p], p, components);
+ }
} else {
- GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
- scaler->lut_size);
+ // switch to direct sampling instead, for efficiency/compatibility
+ for (int yy = y; yy <= bound && yy <= y+1; yy++) {
+ for (int xx = x; xx <= bound && xx <= x+1; xx++)
+ polar_sample(sc, scaler, xx, yy, -1, components);
+ }
}
- GLSL(wsum += w;)
- GLSLF("c = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
- GLSL(color += vec4(w) * c;)
- if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
- GLSL(lo = min(lo, c);)
- GLSL(hi = max(hi, c);)
- }
- if (maybe_skippable)
- GLSLF("}\n");
}
}
+
GLSL(color = color / vec4(wsum);)
- if (use_ar)
- GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
- scaler->conf.antiring);
GLSLF("}\n");
}