From 4a95be014b8875567e8e800a6075d876ba824081 Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 8 Dec 2014 15:23:21 +0100 Subject: vo_opengl: never use 1D textures for lookup textures This was a microoptimization for small filters which need 4 or less weights per sample point. When I originally wrote this code, using a 1D texture seemed to give a slight speed gain, but now I couldn't measure any difference. Remove this to simplify the code. --- video/out/gl_video.c | 27 ++++++-------------- video/out/gl_video_shaders.glsl | 56 ++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 48 deletions(-) (limited to 'video') diff --git a/video/out/gl_video.c b/video/out/gl_video.c index efc1b7b9a7..124d949a03 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -1176,39 +1176,28 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler) int size = scaler->kernel->size; assert(size < FF_ARRAY_ELEMS(lut_tex_formats)); const struct lut_tex_format *fmt = &lut_tex_formats[size]; - bool use_2d = fmt->pixels > 1; bool is_luma = scaler->index == 0; - scaler->lut_name = use_2d - ? (is_luma ? "lut_l_2d" : "lut_c_2d") - : (is_luma ? "lut_l_1d" : "lut_c_1d"); + scaler->lut_name = is_luma ? "lut_l" : "lut_c"; gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_SCALERS + scaler->index); - GLenum target = use_2d ? GL_TEXTURE_2D : GL_TEXTURE_1D; if (!scaler->gl_lut) gl->GenTextures(1, &scaler->gl_lut); - gl->BindTexture(target, scaler->gl_lut); + gl->BindTexture(GL_TEXTURE_2D, scaler->gl_lut); gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE * size); mp_compute_lut(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights); - if (use_2d) { - gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, fmt->pixels, - LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, - weights); - } else { - gl->TexImage1D(GL_TEXTURE_1D, 0, fmt->internal_format, - LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, - weights); - } + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, fmt->pixels, + LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, weights); talloc_free(weights); - gl->TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - gl->TexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - gl->TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - gl->TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); gl->ActiveTexture(GL_TEXTURE0); diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl index f6b84fc00c..b8e30e175f 100644 --- a/video/out/gl_video_shaders.glsl +++ b/video/out/gl_video_shaders.glsl @@ -154,10 +154,8 @@ uniform VIDEO_SAMPLER texture3; uniform vec2 textures_size[4]; uniform vec2 chroma_center_offset; uniform vec2 chroma_div; -uniform sampler1D lut_c_1d; -uniform sampler1D lut_l_1d; -uniform sampler2D lut_c_2d; -uniform sampler2D lut_l_2d; +uniform sampler2D lut_c; +uniform sampler2D lut_l; uniform sampler3D lut_3d; uniform sampler2D dither; uniform mat4x3 colormatrix; @@ -221,13 +219,13 @@ vec4 sample_bicubic_fast(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float p return mix(aa, ab, parmx.b); } -float[2] weights2(sampler1D lookup, float f) { - vec4 c = texture1D(lookup, f); +float[2] weights2(sampler2D lookup, float f) { + vec4 c = texture(lookup, vec2(0.5, f)); return float[2](c.r, c.g); } -float[4] weights4(sampler1D lookup, float f) { - vec4 c = texture1D(lookup, f); +float[4] weights4(sampler2D lookup, float f) { + vec4 c = texture(lookup, vec2(0.5, f)); return float[4](c.r, c.g, c.b, c.a); } @@ -259,8 +257,8 @@ WEIGHTS_N(weights64, 64) // The dir parameter is (0, 1) or (1, 0), and we expect the shader compiler to // remove all the redundant multiplications and additions. -#define SAMPLE_CONVOLUTION_SEP_N(NAME, N, SAMPLERT, WEIGHTS_FUNC) \ - vec4 NAME(vec2 dir, SAMPLERT lookup, VIDEO_SAMPLER tex, vec2 texsize, \ +#define SAMPLE_CONVOLUTION_SEP_N(NAME, N, WEIGHTS_FUNC) \ + vec4 NAME(vec2 dir, sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize, \ vec2 texcoord) { \ vec2 pt = (1 / texsize) * dir; \ float fcoord = dot(fract(texcoord * texsize - 0.5), dir); \ @@ -273,17 +271,17 @@ WEIGHTS_N(weights64, 64) return res; \ } -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, sampler1D, weights2) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, sampler1D, weights4) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, sampler2D, weights6) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, sampler2D, weights8) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, sampler2D, weights12) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, sampler2D, weights16) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep32, 32, sampler2D, weights32) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, sampler2D, weights64) - -#define SAMPLE_CONVOLUTION_N(NAME, N, SAMPLERT, WEIGHTS_FUNC) \ - vec4 NAME(SAMPLERT lookup, VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {\ +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, weights2) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, weights4) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, weights6) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, weights8) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, weights12) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, weights16) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep32, 32, weights32) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, weights64) + +#define SAMPLE_CONVOLUTION_N(NAME, N, WEIGHTS_FUNC) \ + vec4 NAME(sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {\ vec2 pt = 1 / texsize; \ vec2 fcoord = fract(texcoord * texsize - 0.5); \ vec2 base = texcoord - fcoord * pt - pt * (N / 2 - 1); \ @@ -299,14 +297,14 @@ SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, sampler2D, weights64) return res; \ } -SAMPLE_CONVOLUTION_N(sample_convolution2, 2, sampler1D, weights2) -SAMPLE_CONVOLUTION_N(sample_convolution4, 4, sampler1D, weights4) -SAMPLE_CONVOLUTION_N(sample_convolution6, 6, sampler2D, weights6) -SAMPLE_CONVOLUTION_N(sample_convolution8, 8, sampler2D, weights8) -SAMPLE_CONVOLUTION_N(sample_convolution12, 12, sampler2D, weights12) -SAMPLE_CONVOLUTION_N(sample_convolution16, 16, sampler2D, weights16) -SAMPLE_CONVOLUTION_N(sample_convolution32, 32, sampler2D, weights32) -SAMPLE_CONVOLUTION_N(sample_convolution64, 64, sampler2D, weights64) +SAMPLE_CONVOLUTION_N(sample_convolution2, 2, weights2) +SAMPLE_CONVOLUTION_N(sample_convolution4, 4, weights4) +SAMPLE_CONVOLUTION_N(sample_convolution6, 6, weights6) +SAMPLE_CONVOLUTION_N(sample_convolution8, 8, weights8) +SAMPLE_CONVOLUTION_N(sample_convolution12, 12, weights12) +SAMPLE_CONVOLUTION_N(sample_convolution16, 16, weights16) +SAMPLE_CONVOLUTION_N(sample_convolution32, 32, weights32) +SAMPLE_CONVOLUTION_N(sample_convolution64, 64, weights64) // Unsharp masking vec4 sample_sharpen3(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float param1) { -- cgit v1.2.3