6 files changed, 32 insertions, 9 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index a7ec35b3ae..94a153408a 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4004,6 +4004,14 @@ The following video options are currently all specific to ``--vo=opengl`` and
     fringes of black, mostly around moving edges) in exchange for potentially
     adding more blur.
 
+``--scale-cutoff=<value>``, ``--cscale-cutoff=<value>``, ``--dscale-cutoff=<value>``
+    Cut off the filter kernel prematurely once the value range drops below
+    this threshold. Doing so allows more aggressive pruning of skippable
+    coefficients by disregarding parts of the LUT which are effectively zeroed
+    out by the window function. Only affects polar (EWA) filters. The default
+    is 0.001 for each, which is perceptually transparent but provides a 10%-20%
+    speedup, depending on the exact radius and filter kernel chosen.
+
 ``--scale-taper=<value>``, ``--scale-wtaper=<value>``, ``--dscale-taper=<value>``, ``--dscale-wtaper=<value>``, ``--cscale-taper=<value>``, ``--cscale-wtaper=<value>``, ``--tscale-taper=<value>``, ``--tscale-wtaper=<value>``
     Kernel/window taper factor. Increasing this flattens the filter function.
     Value range is 0 to 1. A value of 0 (the default) means no flattening, a
diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c
index 11680a064a..09f85af5f4 100644
--- a/video/out/filter_kernels.c
+++ b/video/out/filter_kernels.c
@@ -152,10 +152,14 @@ static void mp_compute_weights(struct filter_kernel *filter, double f,
 void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
 {
     if (filter->polar) {
+        filter->radius_cutoff = 0.0;
         // Compute a 1D array indexed by radius
         for (int x = 0; x < count; x++) {
             double r = x * filter->f.radius / (count - 1);
             out_array[x] = sample_filter(filter, r);
+
+            if (fabs(out_array[x]) > filter->value_cutoff)
+                filter->radius_cutoff = r;
         }
     } else {
         // Compute a 2D array indexed by subpixel position
diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h
index 74cc3eb148..c9a89f6847 100644
--- a/video/out/filter_kernels.h
+++ b/video/out/filter_kernels.h
@@ -29,6 +29,7 @@ struct filter_kernel {
     struct filter_window f; // the kernel itself
     struct filter_window w; // window storage
     bool clamp; // clamp to the range [0-1]
+    double value_cutoff; // discard all contributions below this value (polar)
     // Constant values
     const char *window; // default window
     bool polar;         // whether or not the filter uses polar coordinates
@@ -38,6 +39,7 @@ struct filter_kernel {
                           // function radius to the possibly wider
                           // (in the case of downsampling) filter sample
                           // radius.
+    double radius_cutoff; // the true radius at which we can cut off the filter
 };
 
 extern const struct filter_window mp_filter_windows[];
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 41ddf84722..d0d80e94c9 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -294,10 +294,13 @@ static const struct gl_video_opts gl_video_opts_def = {
     .sigmoid_center = 0.75,
     .sigmoid_slope = 6.5,
     .scaler = {
-        {{"bilinear",   .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // scale
-        {{NULL,         .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // dscale
-        {{"bilinear",   .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // cscale
-        {{"mitchell",   .params={NAN, NAN}}, {.params = {NAN, NAN}},
+        {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
+         .cutoff = 0.001}, // scale
+        {{NULL,       .params={NAN, NAN}}, {.params = {NAN, NAN}},
+         .cutoff = 0.001}, // dscale
+        {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
+         .cutoff = 0.001}, // cscale
+        {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}},
          .clamp = 1, }, // tscale
     },
     .scaler_resizes_only = 1,
@@ -324,6 +327,7 @@ static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
     OPT_FLOAT(n"-param1", scaler[i].kernel.params[0], 0),                  \
     OPT_FLOAT(n"-param2", scaler[i].kernel.params[1], 0),                  \
     OPT_FLOAT(n"-blur",   scaler[i].kernel.blur, 0),                       \
+    OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0),             \
     OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0),        \
     OPT_FLOAT(n"-wparam", scaler[i].window.params[0], 0),                  \
     OPT_FLOAT(n"-wblur",  scaler[i].window.blur, 0),                       \
@@ -1437,6 +1441,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
         scaler->kernel->f.radius = conf->radius;
 
     scaler->kernel->clamp = conf->clamp;
+    scaler->kernel->value_cutoff = conf->cutoff;
 
     scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
 
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index 6c41e70b1e..4a51d421ad 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -43,6 +43,7 @@ struct scaler_config {
     struct scaler_fun window;
     float radius;
     float antiring;
+    float cutoff;
     int clamp;
 };
 
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 9e54d33d41..9ed85ffa09 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -108,8 +108,10 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler
 void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
 {
     double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale;
-    int bound = ceil(radius);
+    double radius_cutoff = scaler->kernel->radius_cutoff;
+    int bound = ceil(radius_cutoff);
     bool use_ar = scaler->conf.antiring > 0;
+
     GLSL(color = vec4(0.0);)
     GLSLF("{\n");
     GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
@@ -130,12 +132,13 @@ void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
             int xx = x > 0 ? x-1 : x;
             double dmax = sqrt(xx*xx + yy*yy);
             // Skip samples definitely outside the radius
-            if (dmax >= radius)
+            if (dmax >= radius_cutoff)
                 continue;
             GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
             // Check for samples that might be skippable
-            if (dmax >= radius - M_SQRT2)
-                GLSLF("if (d < 1.0) {\n");
+            bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
+            if (maybe_skippable)
+                GLSLF("if (d < %f) {\n", radius_cutoff / radius);
             if (scaler->gl_target == GL_TEXTURE_1D) {
                 GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
                       scaler->lut_size);
@@ -150,7 +153,7 @@ void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
                 GLSL(lo = min(lo, c);)
                 GLSL(hi = max(hi, c);)
             }
-            if (dmax >= radius - M_SQRT2)
+            if (maybe_skippable)
                 GLSLF("}\n");
         }
     }