diff options
author | Niklas Haas <git@nand.wakku.to> | 2015-01-18 17:41:49 +0100 |
---|---|---|
committer | Niklas Haas <git@nand.wakku.to> | 2015-01-22 19:29:23 +0100 |
commit | 6c250505fedc54a3918788f70445f5fff9d2569a (patch) | |
tree | 001d8de845975c6d24e345925a4515ec655e00ff /video | |
parent | f5e48f023524630d0334b1fbc2f2dc44bbc2819b (diff) | |
download | mpv-6c250505fedc54a3918788f70445f5fff9d2569a.tar.bz2 mpv-6c250505fedc54a3918788f70445f5fff9d2569a.tar.xz |
vo_opengl: unroll ewa_lanczos to avoid looping and unnecessary samples
This speeds up performance by a factor of something like 10%,
since it omits unnecessary checks.
This will also make adding anti-ringing easier.
Diffstat (limited to 'video')
-rw-r--r-- | video/out/gl_video.c | 24 | ||||
-rw-r--r-- | video/out/gl_video_shaders.glsl | 15 |
2 files changed, 29 insertions, 10 deletions
diff --git a/video/out/gl_video.c b/video/out/gl_video.c index c58521ed49..7400ffba0f 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -954,9 +954,29 @@ static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass) APPENDF(shader, "#define DEF_SCALER%d \\\n ", unit); char lut_fn[40]; if (scaler->kernel->polar) { + int radius = (int)scaler->kernel->radius; // SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT) - APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s)\n", - name, (int)scaler->kernel->radius, lut_tex); + APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s, WEIGHTS%d)\n", + name, radius, lut_tex, unit); + + // Pre-compute unrolled weights matrix + APPENDF(shader, "#define WEIGHTS%d(LUT) \\\n ", unit); + for (int y = 1-radius; y <= radius; y++) { + for (int x = 1-radius; x <= radius; x++) { + // Since we can't know the subpixel position in advance, + // assume a worst case scenario. + int yy = y > 0 ? y-1 : y; + int xx = x > 0 ? x-1 : x; + double d = sqrt(xx*xx + yy*yy); + + // Samples outside the radius are unnecessary + if (d < radius) { + APPENDF(shader, "SAMPLE_POLAR(LUT, %f, %d, %d) \\\n ", + (double)radius, x, y); + } + } + } + APPENDF(shader, "\n"); } else { if (size == 2 || size == 6) { snprintf(lut_fn, sizeof(lut_fn), "weights%d", size); diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl index 1a489835cc..fa9bfa2e95 100644 --- a/video/out/gl_video_shaders.glsl +++ b/video/out/gl_video_shaders.glsl @@ -298,21 +298,20 @@ float[6] weights6(sampler2D lookup, float f) { return res; \ } +#define SAMPLE_POLAR(LUT, R, X, Y) \ + w = texture1D(LUT, length(vec2(X, Y) - fcoord)/R).r; \ + wsum += w; \ + res += w * texture(tex, base + pt * vec2(X, Y)); \ -#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT) \ +#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT, WEIGHTS_FN) \ vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) { \ vec2 pt = vec2(1.0) / texsize; \ vec2 fcoord = fract(texcoord * texsize - vec2(0.5)); \ vec2 base = texcoord - fcoord * pt; \ vec4 res = vec4(0); \ float wsum = 0; \ - for (int y = 1-R; y <= R; y++) { \ - for (int x = 1-R; x <= R; x++) { \ - float w = texture1D(LUT, length(vec2(x,y) - fcoord)/R).r; \ - wsum += w; \ - res += w * texture(tex, base + pt * vec2(x, y)); \ - } \ - } \ + float w; \ + WEIGHTS_FN(LUT); \ return res / wsum; \ } |