filter_kernels: add radius cutoff functionality

This allows filter functions to be prematurely cut off once their contributions start becoming insignificant. This effectively prevents wasted GPU time sampling from parts of the function that are essentially reduced to zero by the window function, providing anywhere from a 10% to 20% speedup. (5700μs -> 4700μs for me)
author: Niklas Haas <git@haasn.xyz> 2017-07-03 11:23:48 +0200
committer: Niklas Haas <git@haasn.xyz> 2017-07-03 11:51:37 +0200
commit: 8854a2bef61090fdcc6b815112ddd966ef07d771 (patch)
tree: 553ec82e7df0eeff9aa980c00f6ad5c14b385518 /video/out/opengl
parent: 41b3b116695bcad3fdb3129b133e2a97d9a76847 (diff)
download: mpv-8854a2bef61090fdcc6b815112ddd966ef07d771.tar.bz2
mpv-8854a2bef61090fdcc6b815112ddd966ef07d771.tar.xz
3 files changed, 18 insertions, 9 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 41ddf84722..d0d80e94c9 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -294,10 +294,13 @@ static const struct gl_video_opts gl_video_opts_def = {
     .sigmoid_center = 0.75,
     .sigmoid_slope = 6.5,
     .scaler = {
-        {{"bilinear",   .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // scale
-        {{NULL,         .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // dscale
-        {{"bilinear",   .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // cscale
-        {{"mitchell",   .params={NAN, NAN}}, {.params = {NAN, NAN}},
+        {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
+         .cutoff = 0.001}, // scale
+        {{NULL,       .params={NAN, NAN}}, {.params = {NAN, NAN}},
+         .cutoff = 0.001}, // dscale
+        {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
+         .cutoff = 0.001}, // cscale
+        {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}},
          .clamp = 1, }, // tscale
     },
     .scaler_resizes_only = 1,
@@ -324,6 +327,7 @@ static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
     OPT_FLOAT(n"-param1", scaler[i].kernel.params[0], 0),                  \
     OPT_FLOAT(n"-param2", scaler[i].kernel.params[1], 0),                  \
     OPT_FLOAT(n"-blur",   scaler[i].kernel.blur, 0),                       \
+    OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0),             \
     OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0),        \
     OPT_FLOAT(n"-wparam", scaler[i].window.params[0], 0),                  \
     OPT_FLOAT(n"-wblur",  scaler[i].window.blur, 0),                       \
@@ -1437,6 +1441,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
         scaler->kernel->f.radius = conf->radius;
 
     scaler->kernel->clamp = conf->clamp;
+    scaler->kernel->value_cutoff = conf->cutoff;
 
     scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
 
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index 6c41e70b1e..4a51d421ad 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -43,6 +43,7 @@ struct scaler_config {
     struct scaler_fun window;
     float radius;
     float antiring;
+    float cutoff;
     int clamp;
 };
 
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 9e54d33d41..9ed85ffa09 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -108,8 +108,10 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler
 void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
 {
     double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale;
-    int bound = ceil(radius);
+    double radius_cutoff = scaler->kernel->radius_cutoff;
+    int bound = ceil(radius_cutoff);
     bool use_ar = scaler->conf.antiring > 0;
+
     GLSL(color = vec4(0.0);)
     GLSLF("{\n");
     GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
@@ -130,12 +132,13 @@ void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
             int xx = x > 0 ? x-1 : x;
             double dmax = sqrt(xx*xx + yy*yy);
             // Skip samples definitely outside the radius
-            if (dmax >= radius)
+            if (dmax >= radius_cutoff)
                 continue;
             GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
             // Check for samples that might be skippable
-            if (dmax >= radius - M_SQRT2)
-                GLSLF("if (d < 1.0) {\n");
+            bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
+            if (maybe_skippable)
+                GLSLF("if (d < %f) {\n", radius_cutoff / radius);
             if (scaler->gl_target == GL_TEXTURE_1D) {
                 GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
                       scaler->lut_size);
@@ -150,7 +153,7 @@ void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
                 GLSL(lo = min(lo, c);)
                 GLSL(hi = max(hi, c);)
             }
-            if (dmax >= radius - M_SQRT2)
+            if (maybe_skippable)
                 GLSLF("}\n");
         }
     }
author	Niklas Haas <git@haasn.xyz>	2017-07-03 11:23:48 +0200
committer	Niklas Haas <git@haasn.xyz>	2017-07-03 11:51:37 +0200
commit	8854a2bef61090fdcc6b815112ddd966ef07d771 (patch)
tree	553ec82e7df0eeff9aa980c00f6ad5c14b385518 /video/out/opengl
parent	41b3b116695bcad3fdb3129b133e2a97d9a76847 (diff)
download	mpv-8854a2bef61090fdcc6b815112ddd966ef07d771.tar.bz2 mpv-8854a2bef61090fdcc6b815112ddd966ef07d771.tar.xz