From 26baf5b9da4faaec6f5e39a8efba7e58dd6317ed Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Sun, 4 Jan 2015 23:11:27 +0100 Subject: vo_opengl: add ewa_lanczos upscaler (aka jinc) This is the polar (elliptic weighted average) version of lanczos. This introduces a general new form of polar filters. --- DOCS/man/vo.rst | 8 +++- video/out/filter_kernels.c | 71 ++++++++++++++++++++++++++++++++++ video/out/filter_kernels.h | 3 ++ video/out/gl_video.c | 85 +++++++++++++++++++++++++++-------------- video/out/gl_video_shaders.glsl | 21 ++++++++++ 5 files changed, 158 insertions(+), 30 deletions(-) diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst index 0566fd6817..92b88dc523 100644 --- a/DOCS/man/vo.rst +++ b/DOCS/man/vo.rst @@ -304,6 +304,12 @@ Available video output drivers are: ``lanczos`` Generic Lanczos scaling filter. Set radius with ``lradius``. + ``ewa_lanczos`` + Generic elliptic weighted average Lanczos scaling filter. Also + known as Jinc. The radius can be set with ``lradius`` up to a + maximum value of 16, but note that performance drops very quickly + as the radius increases. + ``spline36`` This is the default when using ``opengl-hq``. @@ -348,7 +354,7 @@ Available video output drivers are: Set radius for filters listed below, must be a float number between 1.0 and 8.0. Defaults to be 2.0 if not specified. - ``sinc``, ``lanczos``, ``blackman``, ``gaussian`` + ``sinc``, ``lanczos``, ``ewa_lanczos``, ``blackman``, ``gaussian`` Note that depending on filter implementation details and video scaling ratio, the radius that actually being used might be different diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c index 26f62ff567..4e5ca471bb 100644 --- a/video/out/filter_kernels.c +++ b/video/out/filter_kernels.c @@ -58,6 +58,11 @@ bool mp_init_filter(struct filter_kernel *filter, const int *sizes, { if (filter->radius < 0) filter->radius = 2.0; + // polar filters can be of any radius, and nothing special is needed + if (filter->polar) { + filter->size = filter->radius; + return true; + } // only downscaling requires widening the filter filter->inv_scale = inv_scale >= 1.0 ? inv_scale : 1.0; double support = filter->radius * filter->inv_scale; @@ -111,6 +116,18 @@ void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array) } } +// Fill the given array with weights for the range [0, R], where R is the +// radius of hte filter. The array is interpreted as a one-dimensional array +// of count items. +void mp_compute_lut_polar(struct filter_kernel *filter, int count, float *out_array) +{ + assert(filter->radius > 0); + for (int x = 0; x < count; x++) { + double r = x * filter->radius / (count - 1); + out_array[x] = r <= filter->radius ? filter->weight(filter, r) : 0; + } +} + typedef struct filter_kernel kernel; static double nearest(kernel *k, double x) @@ -261,6 +278,14 @@ static double sinc(kernel *k, double x) return sin(pix) / pix; } +static double jinc(kernel *k, double x) +{ + if (x == 0.0) + return 1.0; + double pix = M_PI * x; + return 2.0 * j1(pix) / pix; +} + static double lanczos(kernel *k, double x) { double radius = k->size / 2; @@ -272,6 +297,48 @@ static double lanczos(kernel *k, double x) return radius * sin(pix) * sin(pix / radius) / (pix * pix); } +static double ewa_lanczos(kernel *k, double x) +{ + double radius = k->radius; + assert(radius >= 1.0); + + // This is already three orders of magnitude slower than anything you could + // possibly hope to play back in realtime and results in tons of ringing + // artifacts, so I doubt anybody will complain. + if (radius > 16) + radius = 16; + + if (fabs(x) < 1e-8) + return 1.0; + if (fabs(x) >= radius) + return 0.0; + + // Precomputed zeros of the jinc() function, needed to adjust the + // window size. Computing this at runtime is nontrivial. + // Copied from: https://github.com/AviSynth/jinc-resize/blob/master/JincResize/JincFilter.cpp#L171 + static double jinc_zeros[16] = { + 1.2196698912665045, + 2.2331305943815286, + 3.2383154841662362, + 4.2410628637960699, + 5.2427643768701817, + 6.2439216898644877, + 7.2447598687199570, + 8.2453949139520427, + 9.2458926849494673, + 10.246293348754916, + 11.246622794877883, + 12.246898461138105, + 13.247132522181061, + 14.247333735806849, + 15.247508563037300, + 16.247661874700962 + }; + + double window = jinc_zeros[0] / jinc_zeros[(int)radius - 1]; + return jinc(k, x) * jinc(k, x*window); +} + static double blackman(kernel *k, double x) { double radius = k->size / 2; @@ -303,6 +370,10 @@ const struct filter_kernel mp_filter_kernels[] = { {"sinc3", 3, sinc}, {"sinc4", 4, sinc}, {"sinc", -1, sinc}, + {"ewa_lanczos2", 2, ewa_lanczos, .polar = true}, + {"ewa_lanczos3", 3, ewa_lanczos, .polar = true}, + {"ewa_lanczos4", 4, ewa_lanczos, .polar = true}, + {"ewa_lanczos", -1, ewa_lanczos, .polar = true}, {"lanczos2", 2, lanczos}, {"lanczos3", 3, lanczos}, {"lanczos4", 4, lanczos}, diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h index f9a413b9f7..4b407f4479 100644 --- a/video/out/filter_kernels.h +++ b/video/out/filter_kernels.h @@ -28,6 +28,8 @@ struct filter_kernel { // The filter params can be changed at runtime. Only used by some filters. float params[2]; + // Whether or not the filter uses polar coordinates + bool polar; // The following values are set by mp_init_filter() at runtime. // Number of coefficients; equals the rounded up radius multiplied with 2. int size; @@ -41,5 +43,6 @@ bool mp_init_filter(struct filter_kernel *filter, const int *sizes, double scale); void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w); void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array); +void mp_compute_lut_polar(struct filter_kernel *filter, int count, float *out_array); #endif /* MPLAYER_FILTER_KERNELS_H */ diff --git a/video/out/gl_video.c b/video/out/gl_video.c index c6077c6ad2..36a34449c9 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -952,23 +952,29 @@ static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass) snprintf(name, sizeof(name), "sample_scaler%d", unit); APPENDF(shader, "#define DEF_SCALER%d \\\n ", unit); char lut_fn[40]; - if (size == 2 || size == 6) { - snprintf(lut_fn, sizeof(lut_fn), "weights%d", size); + if (scaler->kernel->polar) { + // SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT) + APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s)\n", + name, (int)scaler->kernel->radius, lut_tex); } else { - snprintf(lut_fn, sizeof(lut_fn), "weights_scaler%d", unit); - APPENDF(shader, "WEIGHTS_N(%s, %d) \\\n ", lut_fn, size); - } - if (pass != -1) { - // The direction/pass assignment is rather arbitrary, but fixed in - // other parts of the code (like FBO setup). - const char *direction = pass == 0 ? "0, 1" : "1, 0"; - // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC) - APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n", - name, direction, size, lut_tex, lut_fn); - } else { - // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC) - APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n", - name, size, lut_tex, lut_fn); + if (size == 2 || size == 6) { + snprintf(lut_fn, sizeof(lut_fn), "weights%d", size); + } else { + snprintf(lut_fn, sizeof(lut_fn), "weights_scaler%d", unit); + APPENDF(shader, "WEIGHTS_N(%s, %d) \\\n ", lut_fn, size); + } + if (pass != -1) { + // The direction/pass assignment is rather arbitrary, but fixed in + // other parts of the code (like FBO setup). + const char *direction = pass == 0 ? "0, 1" : "1, 0"; + // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC) + APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n", + name, direction, size, lut_tex, lut_fn); + } else { + // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC) + APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n", + name, size, lut_tex, lut_fn); + } } APPENDF(shader, "#define %s %s\n", target, name); } @@ -1163,7 +1169,7 @@ static void compile_shaders(struct gl_video *p) shader_def_opt(&header_final, "USE_DITHER", p->dither_texture != 0); shader_def_opt(&header_final, "USE_TEMPORAL_DITHER", p->opts.temporal_dither); - if (p->opts.scale_sep && p->scalers[0].kernel) { + if (p->opts.scale_sep && p->scalers[0].kernel && !p->scalers[0].kernel->polar) { header_sep = talloc_strdup(tmp, ""); shader_def_opt(&header_sep, "FIXED_SCALE", true); shader_setup_scaler(&header_sep, &p->scalers[0], 0); @@ -1312,32 +1318,53 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler) int size = scaler->kernel->size; int elems_per_pixel = 4; - if (size == 2) { + if (scaler->kernel->polar) { + elems_per_pixel = 1; + } else if (size == 2) { elems_per_pixel = 2; } else if (size == 6) { elems_per_pixel = 3; } int width = size / elems_per_pixel; const struct fmt_entry *fmt = &gl_float16_formats[elems_per_pixel - 1]; - scaler->lut_name = scaler->index == 0 ? "lut_l" : "lut_c"; + if (scaler->kernel->polar) { + scaler->lut_name = scaler->index == 0 ? "lut_polar_l" : "lut_polar_c"; + } else { + scaler->lut_name = scaler->index == 0 ? "lut_l" : "lut_c"; + } gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_SCALERS + scaler->index); if (!scaler->gl_lut) gl->GenTextures(1, &scaler->gl_lut); - gl->BindTexture(GL_TEXTURE_2D, scaler->gl_lut); + if (scaler->kernel->polar) { + gl->BindTexture(GL_TEXTURE_1D, scaler->gl_lut); - float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE * size); - mp_compute_lut(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights); - gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, width, - LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, weights); - talloc_free(weights); + float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE); + mp_compute_lut_polar(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights); + gl->TexImage1D(GL_TEXTURE_1D, 0, fmt->internal_format, LOOKUP_TEXTURE_SIZE, + 0, fmt->format, GL_FLOAT, weights); + talloc_free(weights); + + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + } else { + gl->BindTexture(GL_TEXTURE_2D, scaler->gl_lut); + + float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE * size); + mp_compute_lut(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights); + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, width, + LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, weights); + talloc_free(weights); + + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); gl->ActiveTexture(GL_TEXTURE0); diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl index 4037e42449..dac19cb673 100644 --- a/video/out/gl_video_shaders.glsl +++ b/video/out/gl_video_shaders.glsl @@ -168,6 +168,8 @@ uniform vec2 chroma_center_offset; uniform vec2 chroma_div; uniform sampler2D lut_c; uniform sampler2D lut_l; +uniform sampler1D lut_polar_c; +uniform sampler1D lut_polar_l; #if HAVE_3DTEX uniform sampler3D lut_3d; #endif @@ -297,6 +299,25 @@ float[6] weights6(sampler2D lookup, float f) { return res; \ } + +#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT) \ + vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) { \ + vec2 pt = vec2(1.0) / texsize; \ + vec2 fcoord = fract(texcoord * texsize - vec2(0.5)); \ + vec2 base = texcoord - fcoord * pt; \ + vec4 res = vec4(0); \ + float wsum = 0; \ + for (int y = 1-R; y <= R; y++) { \ + for (int x = 1-R; x <= R; x++) { \ + vec2 d = vec2(x,y) - fcoord; \ + float w = texture1D(LUT, sqrt(d.x*d.x + d.y*d.y)/R).r; \ + wsum += w; \ + res += w * texture(tex, base + pt * vec2(x, y)); \ + } \ + } \ + return res / wsum; \ + } + #ifdef DEF_SCALER0 DEF_SCALER0 #endif -- cgit v1.2.3