From f5e48f023524630d0334b1fbc2f2dc44bbc2819b Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Sat, 17 Jan 2015 17:28:47 +0100 Subject: vo_opengl: clean up ewa_lanczos code This fixes compatibility with GLES 2.0 and makes the code a bit neater in general. It also properly forces indirect scaling for subsampled video regardless of the lscale setting. --- video/out/filter_kernels.c | 11 ++++---- video/out/filter_kernels.h | 4 +-- video/out/gl_common.c | 3 ++- video/out/gl_common.h | 5 ++-- video/out/gl_video.c | 58 +++++++++++++++++++++++++++++------------ video/out/gl_video_shaders.glsl | 11 +++++--- 6 files changed, 60 insertions(+), 32 deletions(-) diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c index f2c97b4bde..4faeb0b4b8 100644 --- a/video/out/filter_kernels.c +++ b/video/out/filter_kernels.c @@ -58,10 +58,9 @@ bool mp_init_filter(struct filter_kernel *filter, const int *sizes, { if (filter->radius < 0) filter->radius = 3.0; - // polar filters can be of any radius, and nothing special is needed + // polar filters are dependent only on the radius if (filter->polar) { - filter->size = filter->radius; - filter->num_coefficients = 1; + filter->size = 1; return true; } // only downscaling requires widening the filter @@ -76,14 +75,12 @@ bool mp_init_filter(struct filter_kernel *filter, const int *sizes, cursize++; if (*cursize) { filter->size = *cursize; - filter->num_coefficients = filter->size; return true; } else { // The filter doesn't fit - instead of failing completely, use the // largest filter available. This is incorrect, but better than refusing // to do anything. filter->size = cursize[-1]; - filter->num_coefficients = filter->size; filter->inv_scale = filter->size / 2.0 / filter->radius; return false; } @@ -110,16 +107,18 @@ void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w) } // Fill the given array with weights for the range [0.0, 1.0]. The array is -// interpreted as rectangular array of count * filter->num_coefficients items. +// interpreted as rectangular array of count * filter->size items. void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array) { if (filter->polar) { + // Compute a 1D array indexed by radius assert(filter->radius > 0); for (int x = 0; x < count; x++) { double r = x * filter->radius / (count - 1); out_array[x] = r <= filter->radius ? filter->weight(filter, r) : 0; } } else { + // Compute a 2D array indexed by subpixel position for (int n = 0; n < count; n++) { mp_compute_weights(filter, n / (double)(count - 1), out_array + filter->size * n); diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h index c1d68e0c5b..3b12fcfe57 100644 --- a/video/out/filter_kernels.h +++ b/video/out/filter_kernels.h @@ -31,9 +31,8 @@ struct filter_kernel { // Whether or not the filter uses polar coordinates bool polar; // The following values are set by mp_init_filter() at runtime. - int size; // Number of coefficients; equals the rounded up radius multiplied with 2. - int num_coefficients; + int size; double inv_scale; }; @@ -44,6 +43,5 @@ bool mp_init_filter(struct filter_kernel *filter, const int *sizes, double scale); void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w); void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array); -void mp_compute_lut_polar(struct filter_kernel *filter, int count, float *out_array); #endif /* MPLAYER_FILTER_KERNELS_H */ diff --git a/video/out/gl_common.c b/video/out/gl_common.c index 8bb570684a..70fb42b973 100644 --- a/video/out/gl_common.c +++ b/video/out/gl_common.c @@ -96,6 +96,7 @@ static const struct feature features[] = { {MPGL_CAP_FLOAT_TEX, "Float textures"}, {MPGL_CAP_TEX_RG, "RG textures"}, {MPGL_CAP_1ST_CLASS_ARRAYS, "1st class shader arrays"}, + {MPGL_CAP_1D_TEX, "1D textures"}, {MPGL_CAP_3D_TEX, "3D textures"}, {MPGL_CAP_DEBUG, "debugging extensions"}, {MPGL_CAP_SW, "suspected software renderer"}, @@ -207,7 +208,7 @@ static const struct gl_functions gl_functions[] = { // GL 2.1+ desktop only (and GLSL 120 shaders) { .ver_core = 210, - .provides = MPGL_CAP_ROW_LENGTH | MPGL_CAP_3D_TEX | + .provides = MPGL_CAP_ROW_LENGTH | MPGL_CAP_1D_TEX | MPGL_CAP_3D_TEX | MPGL_CAP_1ST_CLASS_ARRAYS, .functions = (const struct gl_function[]) { DEF_FN(DrawBuffer), diff --git a/video/out/gl_common.h b/video/out/gl_common.h index 24c6091ade..dcb6a86ced 100644 --- a/video/out/gl_common.h +++ b/video/out/gl_common.h @@ -73,8 +73,9 @@ enum { MPGL_CAP_VDPAU = (1 << 11), // GL_NV_vdpau_interop MPGL_CAP_APPLE_RGB_422 = (1 << 12), // GL_APPLE_rgb_422 MPGL_CAP_1ST_CLASS_ARRAYS = (1 << 13), - MPGL_CAP_3D_TEX = (1 << 14), - MPGL_CAP_DEBUG = (1 << 15), + MPGL_CAP_1D_TEX = (1 << 14), + MPGL_CAP_3D_TEX = (1 << 15), + MPGL_CAP_DEBUG = (1 << 16), MPGL_CAP_SW = (1 << 30), // indirect or sw renderer }; diff --git a/video/out/gl_video.c b/video/out/gl_video.c index f16c2e485b..c58521ed49 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -1006,16 +1006,18 @@ static void compile_shaders(struct gl_video *p) char *s_video = get_section(tmp, src, "frag_video"); bool rg = gl->mpgl_caps & MPGL_CAP_TEX_RG; + bool tex1d = gl->mpgl_caps & MPGL_CAP_1D_TEX; bool tex3d = gl->mpgl_caps & MPGL_CAP_3D_TEX; bool arrays = gl->mpgl_caps & MPGL_CAP_1ST_CLASS_ARRAYS; char *header = talloc_asprintf(tmp, "#version %d%s\n" "#define HAVE_RG %d\n" + "#define HAVE_1DTEX %d\n" "#define HAVE_3DTEX %d\n" "#define HAVE_ARRAYS %d\n" "%s%s", gl->glsl_version, gl->es >= 300 ? " es" : "", - rg, tex3d, arrays, shader_prelude, PRELUDE_END); + rg, tex1d, tex3d, arrays, shader_prelude, PRELUDE_END); bool use_cms = p->opts.srgb || p->use_lut_3d; @@ -1185,7 +1187,7 @@ static void compile_shaders(struct gl_video *p) // has to fetch the coefficients for each texture separately, even though // they're the same (this is not an inherent restriction, but would require // to restructure the shader). - if (header_sep && p->plane_count > 1) + if (p->opts.scale_sep && p->plane_count > 1) use_indirect = true; if (input_is_subsampled(p)) { @@ -1302,7 +1304,7 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler) update_scale_factor(p, scaler); - int size = scaler->kernel->num_coefficients; + int size = scaler->kernel->size; int elems_per_pixel = 4; if (size == 1) { elems_per_pixel = 1; @@ -1314,25 +1316,41 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler) int width = size / elems_per_pixel; assert(size == width * elems_per_pixel); const struct fmt_entry *fmt = &gl_float16_formats[elems_per_pixel - 1]; - scaler->lut_name = scaler->index == 0 ? "lut_l" : "lut_c"; + int target; + + if (scaler->kernel->polar) { + target = GL_TEXTURE_1D; + scaler->lut_name = scaler->index == 0 ? "lut_1d_l" : "lut_1d_c"; + } else { + target = GL_TEXTURE_2D; + scaler->lut_name = scaler->index == 0 ? "lut_2d_l" : "lut_2d_c"; + } gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_SCALERS + scaler->index); if (!scaler->gl_lut) gl->GenTextures(1, &scaler->gl_lut); - gl->BindTexture(GL_TEXTURE_2D, scaler->gl_lut); + gl->BindTexture(target, scaler->gl_lut); float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE * size); mp_compute_lut(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights); - gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, width, - LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, weights); + + if (target == GL_TEXTURE_1D) { + gl->TexImage1D(target, 0, fmt->internal_format, LOOKUP_TEXTURE_SIZE, + 0, fmt->format, GL_FLOAT, weights); + } else { + gl->TexImage2D(target, 0, fmt->internal_format, width, LOOKUP_TEXTURE_SIZE, + 0, fmt->format, GL_FLOAT, weights); + } + talloc_free(weights); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + if (target != GL_TEXTURE_1D) + gl->TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); gl->ActiveTexture(GL_TEXTURE0); @@ -2126,6 +2144,7 @@ static void check_gl_features(struct gl_video *p) bool have_fbo = gl->mpgl_caps & MPGL_CAP_FB; bool have_srgb = gl->mpgl_caps & MPGL_CAP_SRGB_TEX; bool have_arrays = gl->mpgl_caps & MPGL_CAP_1ST_CLASS_ARRAYS; + bool have_1d_tex = gl->mpgl_caps & MPGL_CAP_1D_TEX; bool have_3d_tex = gl->mpgl_caps & MPGL_CAP_3D_TEX; bool have_mix = gl->glsl_version >= 130; @@ -2144,16 +2163,23 @@ static void check_gl_features(struct gl_video *p) // because they will be slow (not critically slow, but still slower). // Without FP textures, we must always disable them. // I don't know if luminance alpha float textures exist, so disregard them. - if (!have_float_tex || !have_arrays || (!have_fbo && p->opts.scale_sep)) { + if (!have_float_tex || !have_arrays || !have_fbo || !have_1d_tex) { for (int n = 0; n < 2; n++) { - if (mp_find_filter_kernel(p->opts.scalers[n])) { - p->opts.scalers[n] = "bilinear"; - char *reason = "scaler (FBO)"; + const struct filter_kernel *kernel = mp_find_filter_kernel(p->opts.scalers[n]); + if (kernel) { + char *reason = ""; + if (!have_fbo) + reason = "scaler (FBO)"; if (!have_float_tex) reason = "scaler (float tex.)"; if (!have_arrays) reason = "scaler (no GLSL support)"; - disabled[n_disabled++] = reason; + if (!have_1d_tex && kernel->polar) + reason = "scaler (1D tex.)"; + if (*reason) { + p->opts.scalers[n] = "bilinear"; + disabled[n_disabled++] = reason; + } } } } diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl index 51c444aa2e..1a489835cc 100644 --- a/video/out/gl_video_shaders.glsl +++ b/video/out/gl_video_shaders.glsl @@ -163,8 +163,12 @@ uniform VIDEO_SAMPLER texture3; uniform vec2 textures_size[4]; uniform vec2 chroma_center_offset; uniform vec2 chroma_div; -uniform sampler2D lut_c; -uniform sampler2D lut_l; +uniform sampler2D lut_2d_c; +uniform sampler2D lut_2d_l; +#if HAVE_1DTEX +uniform sampler1D lut_1d_c; +uniform sampler1D lut_1d_l; +#endif #if HAVE_3DTEX uniform sampler3D lut_3d; #endif @@ -304,8 +308,7 @@ float[6] weights6(sampler2D lookup, float f) { float wsum = 0; \ for (int y = 1-R; y <= R; y++) { \ for (int x = 1-R; x <= R; x++) { \ - vec2 d = vec2(x,y) - fcoord; \ - float w = texture(LUT, vec2(0.5, length(d) / R)).r; \ + float w = texture1D(LUT, length(vec2(x,y) - fcoord)/R).r; \ wsum += w; \ res += w * texture(tex, base + pt * vec2(x, y)); \ } \ -- cgit v1.2.3