From 9c484cb080330d131e5a0b0049492f3583f463ff Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 8 Dec 2014 16:04:08 +0100 Subject: vo_opengl: refactor: instantiate scaler functions at runtime Before this commit, the convolution scaler shader functions were pre- instantiated in the shader file. For every filter size, a corresponding function (with the filter size as suffix) had to be present. Change this, and make the C code emit the necessary bits. This means the shader code is much reduced. (Although hopefully it doesn't make shader compilation faster - it would require a really dumb compiler if it spends its time on dead code.) It also makes it more flexible, which is the main goal. The DEF_SCALER0 stuff is needed because the C code writes the header of the shader, at a point where scaler macros are not defined yet. --- video/out/gl_video.c | 36 +++++++++++++++++++++-------- video/out/gl_video_shaders.glsl | 51 ++++++++++++++--------------------------- 2 files changed, 43 insertions(+), 44 deletions(-) diff --git a/video/out/gl_video.c b/video/out/gl_video.c index 124d949a03..cb32600395 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -835,27 +835,43 @@ static void shader_def_opt(char **shader, const char *name, bool b) shader_def(shader, name, "1"); } +#define APPENDF(s_ptr, ...) \ + *(s_ptr) = talloc_asprintf_append(*(s_ptr), __VA_ARGS__) + static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass) { - const char *target = scaler->index == 0 ? "SAMPLE_L" : "SAMPLE_C"; + int unit = scaler->index; + const char *target = unit == 0 ? "SAMPLE_L" : "SAMPLE_C"; if (!scaler->kernel) { - *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) " - "sample_%s(p0, p1, p2, filter_param1_%c)\n", - target, scaler->name, "lc"[scaler->index]); + APPENDF(shader, "#define %s(p0, p1, p2) " + "sample_%s(p0, p1, p2, filter_param1_%c)\n", + target, scaler->name, "lc"[unit]); } else { int size = scaler->kernel->size; + const char *lut_tex = scaler->lut_name; + char name[40]; + snprintf(name, sizeof(name), "sample_scaler%d", unit); + APPENDF(shader, "#define DEF_SCALER%d \\\n", unit); + char lut_fn[40]; + if (size < 8) { + snprintf(lut_fn, sizeof(lut_fn), "weights%d", size); + } else { + snprintf(lut_fn, sizeof(lut_fn), "weights_scaler%d", unit); + APPENDF(shader, " WEIGHTS_N(%s, %d) \\\n ", lut_fn, size); + } if (pass != -1) { // The direction/pass assignment is rather arbitrary, but fixed in // other parts of the code (like FBO setup). const char *direction = pass == 0 ? "0, 1" : "1, 0"; - *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) " - "sample_convolution_sep%d(vec2(%s), %s, p0, p1, p2)\n", - target, size, direction, scaler->lut_name); + // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC) + APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n", + name, direction, size, lut_tex, lut_fn); } else { - *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) " - "sample_convolution%d(%s, p0, p1, p2)\n", - target, size, scaler->lut_name); + // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC) + APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n", + name, size, lut_tex, lut_fn); } + APPENDF(shader, "#define %s %s\n", target, name); } } diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl index b8e30e175f..5a32fa1222 100644 --- a/video/out/gl_video_shaders.glsl +++ b/video/out/gl_video_shaders.glsl @@ -235,6 +235,7 @@ float[6] weights6(sampler2D lookup, float f) { return float[6](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b); } +// For N=n*4 with n>1 (N==4 is covered by weights4()). #define WEIGHTS_N(NAME, N) \ float[N] NAME(sampler2D lookup, float f) { \ float r[N]; \ @@ -249,21 +250,14 @@ float[6] weights6(sampler2D lookup, float f) { return r; \ } -WEIGHTS_N(weights8, 8) -WEIGHTS_N(weights12, 12) -WEIGHTS_N(weights16, 16) -WEIGHTS_N(weights32, 32) -WEIGHTS_N(weights64, 64) - -// The dir parameter is (0, 1) or (1, 0), and we expect the shader compiler to +// The DIR parameter is (0, 1) or (1, 0), and we expect the shader compiler to // remove all the redundant multiplications and additions. -#define SAMPLE_CONVOLUTION_SEP_N(NAME, N, WEIGHTS_FUNC) \ - vec4 NAME(vec2 dir, sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize, \ - vec2 texcoord) { \ - vec2 pt = (1 / texsize) * dir; \ - float fcoord = dot(fract(texcoord * texsize - 0.5), dir); \ +#define SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC) \ + vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) { \ + vec2 pt = (1 / texsize) * DIR; \ + float fcoord = dot(fract(texcoord * texsize - 0.5), DIR); \ vec2 base = texcoord - fcoord * pt - pt * (N / 2 - 1); \ - float weights[N] = WEIGHTS_FUNC(lookup, fcoord); \ + float weights[N] = WEIGHTS_FUNC(LUT, fcoord); \ vec4 res = vec4(0); \ for (int n = 0; n < N; n++) { \ res += weights[n] * texture(tex, base + pt * n); \ @@ -271,23 +265,14 @@ WEIGHTS_N(weights64, 64) return res; \ } -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, weights2) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, weights4) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, weights6) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, weights8) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, weights12) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, weights16) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep32, 32, weights32) -SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, weights64) - -#define SAMPLE_CONVOLUTION_N(NAME, N, WEIGHTS_FUNC) \ - vec4 NAME(sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {\ +#define SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC) \ + vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) { \ vec2 pt = 1 / texsize; \ vec2 fcoord = fract(texcoord * texsize - 0.5); \ vec2 base = texcoord - fcoord * pt - pt * (N / 2 - 1); \ vec4 res = vec4(0); \ - float w_x[N] = WEIGHTS_FUNC(lookup, fcoord.x); \ - float w_y[N] = WEIGHTS_FUNC(lookup, fcoord.y); \ + float w_x[N] = WEIGHTS_FUNC(LUT, fcoord.x); \ + float w_y[N] = WEIGHTS_FUNC(LUT, fcoord.y); \ for (int y = 0; y < N; y++) { \ vec4 line = vec4(0); \ for (int x = 0; x < N; x++) \ @@ -297,14 +282,12 @@ SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, weights64) return res; \ } -SAMPLE_CONVOLUTION_N(sample_convolution2, 2, weights2) -SAMPLE_CONVOLUTION_N(sample_convolution4, 4, weights4) -SAMPLE_CONVOLUTION_N(sample_convolution6, 6, weights6) -SAMPLE_CONVOLUTION_N(sample_convolution8, 8, weights8) -SAMPLE_CONVOLUTION_N(sample_convolution12, 12, weights12) -SAMPLE_CONVOLUTION_N(sample_convolution16, 16, weights16) -SAMPLE_CONVOLUTION_N(sample_convolution32, 32, weights32) -SAMPLE_CONVOLUTION_N(sample_convolution64, 64, weights64) +#ifdef DEF_SCALER0 +DEF_SCALER0 +#endif +#ifdef DEF_SCALER1 +DEF_SCALER1 +#endif // Unsharp masking vec4 sample_sharpen3(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float param1) { -- cgit v1.2.3