From 02df9886ea239a8efa578677d44ee9b5f74be422 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Tue, 20 Jan 2015 21:24:29 +0100 Subject: vo_opengl: switch to nearest neighbour for trivial resampling This is significantly faster for FBOs on most modern GPUs, although it did not result in a huge difference for the video source texture on the sizes I tested. It might be more significant for 1080p or 4K content, so it's worth revisiting this in the future. It also renames SAMPLE_BILINEAR to SAMPLE_TRIVIAL to match the semantics. --- video/out/gl_video.c | 13 +++++++++++-- video/out/gl_video_shaders.glsl | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/video/out/gl_video.c b/video/out/gl_video.c index 4ab41d8076..6a34e9e743 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -601,6 +601,13 @@ static bool fbotex_init(struct gl_video *p, struct fbotex *fbo, int w, int h, GL_RGBA, GL_UNSIGNED_BYTE, NULL); default_tex_params(gl, p->gl_target); + // Convolution filters don't need linear sampling, so using nearest is + // often faster. + if (p->scalers[0].kernel) { + gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + debug_check_gl(p, "after creating framebuffer texture"); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo); @@ -1215,13 +1222,13 @@ static void compile_shaders(struct gl_video *p) // Force using the luma scaler on chroma. If the "indirect" stage is // used, the actual scaling will happen in the next stage. shader_def(&header_conv, "SAMPLE_C", - use_indirect ? "SAMPLE_BILINEAR" : "SAMPLE_L"); + use_indirect ? "SAMPLE_TRIVIAL" : "SAMPLE_L"); } if (use_indirect) { // We don't use filtering for the Y-plane (luma), because it's never // scaled in this scenario. - shader_def(&header_conv, "SAMPLE_L", "SAMPLE_BILINEAR"); + shader_def(&header_conv, "SAMPLE_L", "SAMPLE_TRIVIAL"); shader_def_opt(&header_conv, "FIXED_SCALE", true); header_conv = t_concat(tmp, header, header_conv); p->indirect_program = @@ -1523,6 +1530,8 @@ static void uninit_rendering(struct gl_video *p) gl->DeleteTextures(1, &p->dither_texture); p->dither_texture = 0; + + fbotex_uninit(p, &p->indirect_fbo); } void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d) diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl index d3cbbb7c41..84c87173c3 100644 --- a/video/out/gl_video_shaders.glsl +++ b/video/out/gl_video_shaders.glsl @@ -201,7 +201,7 @@ vec4 sample_bilinear(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float param return texture(tex, texcoord); } -#define SAMPLE_BILINEAR(p0, p1, p2) sample_bilinear(p0, p1, p2, 0.0) +#define SAMPLE_TRIVIAL(tex, texsize, texcoord) texture(tex, texcoord) // Explanation how bicubic scaling with only 4 texel fetches is done: // http://www.mate.tue.nl/mate/pdfs/10318.pdf -- cgit v1.2.3