summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@nand.wakku.to>2015-01-18 17:41:49 +0100
committerNiklas Haas <git@nand.wakku.to>2015-01-22 19:29:23 +0100
commit6c250505fedc54a3918788f70445f5fff9d2569a (patch)
tree001d8de845975c6d24e345925a4515ec655e00ff
parentf5e48f023524630d0334b1fbc2f2dc44bbc2819b (diff)
downloadmpv-6c250505fedc54a3918788f70445f5fff9d2569a.tar.bz2
mpv-6c250505fedc54a3918788f70445f5fff9d2569a.tar.xz
vo_opengl: unroll ewa_lanczos to avoid looping and unnecessary samples
This speeds up performance by a factor of something like 10%, since it omits unnecessary checks. This will also make adding anti-ringing easier.
-rw-r--r--video/out/gl_video.c24
-rw-r--r--video/out/gl_video_shaders.glsl15
2 files changed, 29 insertions, 10 deletions
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index c58521ed49..7400ffba0f 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -954,9 +954,29 @@ static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass)
APPENDF(shader, "#define DEF_SCALER%d \\\n ", unit);
char lut_fn[40];
if (scaler->kernel->polar) {
+ int radius = (int)scaler->kernel->radius;
// SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT)
- APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s)\n",
- name, (int)scaler->kernel->radius, lut_tex);
+ APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s, WEIGHTS%d)\n",
+ name, radius, lut_tex, unit);
+
+ // Pre-compute unrolled weights matrix
+ APPENDF(shader, "#define WEIGHTS%d(LUT) \\\n ", unit);
+ for (int y = 1-radius; y <= radius; y++) {
+ for (int x = 1-radius; x <= radius; x++) {
+ // Since we can't know the subpixel position in advance,
+ // assume a worst case scenario.
+ int yy = y > 0 ? y-1 : y;
+ int xx = x > 0 ? x-1 : x;
+ double d = sqrt(xx*xx + yy*yy);
+
+ // Samples outside the radius are unnecessary
+ if (d < radius) {
+ APPENDF(shader, "SAMPLE_POLAR(LUT, %f, %d, %d) \\\n ",
+ (double)radius, x, y);
+ }
+ }
+ }
+ APPENDF(shader, "\n");
} else {
if (size == 2 || size == 6) {
snprintf(lut_fn, sizeof(lut_fn), "weights%d", size);
diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl
index 1a489835cc..fa9bfa2e95 100644
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@@ -298,21 +298,20 @@ float[6] weights6(sampler2D lookup, float f) {
return res; \
}
+#define SAMPLE_POLAR(LUT, R, X, Y) \
+ w = texture1D(LUT, length(vec2(X, Y) - fcoord)/R).r; \
+ wsum += w; \
+ res += w * texture(tex, base + pt * vec2(X, Y)); \
-#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT) \
+#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT, WEIGHTS_FN) \
vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) { \
vec2 pt = vec2(1.0) / texsize; \
vec2 fcoord = fract(texcoord * texsize - vec2(0.5)); \
vec2 base = texcoord - fcoord * pt; \
vec4 res = vec4(0); \
float wsum = 0; \
- for (int y = 1-R; y <= R; y++) { \
- for (int x = 1-R; x <= R; x++) { \
- float w = texture1D(LUT, length(vec2(x,y) - fcoord)/R).r; \
- wsum += w; \
- res += w * texture(tex, base + pt * vec2(x, y)); \
- } \
- } \
+ float w; \
+ WEIGHTS_FN(LUT); \
return res / wsum; \
}