From 02df9886ea239a8efa578677d44ee9b5f74be422 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@nand.wakku.to>
Date: Tue, 20 Jan 2015 21:24:29 +0100
Subject: vo_opengl: switch to nearest neighbour for trivial resampling

This is significantly faster for FBOs on most modern GPUs, although it
did not result in a huge difference for the video source texture on the
sizes I tested. It might be more significant for 1080p or 4K content, so
it's worth revisiting this in the future.

It also renames SAMPLE_BILINEAR to SAMPLE_TRIVIAL to match the
semantics.
---
 video/out/gl_video.c            | 13 +++++++++++--
 video/out/gl_video_shaders.glsl |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index 4ab41d8076..6a34e9e743 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -601,6 +601,13 @@ static bool fbotex_init(struct gl_video *p, struct fbotex *fbo, int w, int h,
                    GL_RGBA, GL_UNSIGNED_BYTE, NULL);
     default_tex_params(gl, p->gl_target);
 
+    // Convolution filters don't need linear sampling, so using nearest is
+    // often faster.
+    if (p->scalers[0].kernel) {
+        gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+        gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+    }
+
     debug_check_gl(p, "after creating framebuffer texture");
 
     gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo);
@@ -1215,13 +1222,13 @@ static void compile_shaders(struct gl_video *p)
         // Force using the luma scaler on chroma. If the "indirect" stage is
         // used, the actual scaling will happen in the next stage.
         shader_def(&header_conv, "SAMPLE_C",
-                   use_indirect ? "SAMPLE_BILINEAR" : "SAMPLE_L");
+                   use_indirect ? "SAMPLE_TRIVIAL" : "SAMPLE_L");
     }
 
     if (use_indirect) {
         // We don't use filtering for the Y-plane (luma), because it's never
         // scaled in this scenario.
-        shader_def(&header_conv, "SAMPLE_L", "SAMPLE_BILINEAR");
+        shader_def(&header_conv, "SAMPLE_L", "SAMPLE_TRIVIAL");
         shader_def_opt(&header_conv, "FIXED_SCALE", true);
         header_conv = t_concat(tmp, header, header_conv);
         p->indirect_program =
@@ -1523,6 +1530,8 @@ static void uninit_rendering(struct gl_video *p)
 
     gl->DeleteTextures(1, &p->dither_texture);
     p->dither_texture = 0;
+
+    fbotex_uninit(p, &p->indirect_fbo);
 }
 
 void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl
index d3cbbb7c41..84c87173c3 100644
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@@ -201,7 +201,7 @@ vec4 sample_bilinear(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float param
     return texture(tex, texcoord);
 }
 
-#define SAMPLE_BILINEAR(p0, p1, p2) sample_bilinear(p0, p1, p2, 0.0)
+#define SAMPLE_TRIVIAL(tex, texsize, texcoord) texture(tex, texcoord)
 
 // Explanation how bicubic scaling with only 4 texel fetches is done:
 //   http://www.mate.tue.nl/mate/pdfs/10318.pdf
-- 
cgit v1.2.3