summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@nand.wakku.to>2015-01-20 21:24:29 +0100
committerNiklas Haas <git@nand.wakku.to>2015-01-22 19:40:04 +0100
commit02df9886ea239a8efa578677d44ee9b5f74be422 (patch)
tree276f75d9a92524e91949fce7425826b090d88731
parentf24c2e0f56fdcef9b14c4a2ed15c4e9e801bbeab (diff)
downloadmpv-02df9886ea239a8efa578677d44ee9b5f74be422.tar.bz2
mpv-02df9886ea239a8efa578677d44ee9b5f74be422.tar.xz
vo_opengl: switch to nearest neighbour for trivial resampling
This is significantly faster for FBOs on most modern GPUs, although it did not result in a huge difference for the video source texture on the sizes I tested. It might be more significant for 1080p or 4K content, so it's worth revisiting this in the future. It also renames SAMPLE_BILINEAR to SAMPLE_TRIVIAL to match the semantics.
-rw-r--r--video/out/gl_video.c13
-rw-r--r--video/out/gl_video_shaders.glsl2
2 files changed, 12 insertions, 3 deletions
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index 4ab41d8076..6a34e9e743 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -601,6 +601,13 @@ static bool fbotex_init(struct gl_video *p, struct fbotex *fbo, int w, int h,
GL_RGBA, GL_UNSIGNED_BYTE, NULL);
default_tex_params(gl, p->gl_target);
+ // Convolution filters don't need linear sampling, so using nearest is
+ // often faster.
+ if (p->scalers[0].kernel) {
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ }
+
debug_check_gl(p, "after creating framebuffer texture");
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo);
@@ -1215,13 +1222,13 @@ static void compile_shaders(struct gl_video *p)
// Force using the luma scaler on chroma. If the "indirect" stage is
// used, the actual scaling will happen in the next stage.
shader_def(&header_conv, "SAMPLE_C",
- use_indirect ? "SAMPLE_BILINEAR" : "SAMPLE_L");
+ use_indirect ? "SAMPLE_TRIVIAL" : "SAMPLE_L");
}
if (use_indirect) {
// We don't use filtering for the Y-plane (luma), because it's never
// scaled in this scenario.
- shader_def(&header_conv, "SAMPLE_L", "SAMPLE_BILINEAR");
+ shader_def(&header_conv, "SAMPLE_L", "SAMPLE_TRIVIAL");
shader_def_opt(&header_conv, "FIXED_SCALE", true);
header_conv = t_concat(tmp, header, header_conv);
p->indirect_program =
@@ -1523,6 +1530,8 @@ static void uninit_rendering(struct gl_video *p)
gl->DeleteTextures(1, &p->dither_texture);
p->dither_texture = 0;
+
+ fbotex_uninit(p, &p->indirect_fbo);
}
void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl
index d3cbbb7c41..84c87173c3 100644
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@@ -201,7 +201,7 @@ vec4 sample_bilinear(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float param
return texture(tex, texcoord);
}
-#define SAMPLE_BILINEAR(p0, p1, p2) sample_bilinear(p0, p1, p2, 0.0)
+#define SAMPLE_TRIVIAL(tex, texsize, texcoord) texture(tex, texcoord)
// Explanation how bicubic scaling with only 4 texel fetches is done:
// http://www.mate.tue.nl/mate/pdfs/10318.pdf