From 0da6a7346ae9c2dde2008f664da990bce15f77ab Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@nand.wakku.to>
Date: Fri, 27 Feb 2015 04:32:22 +0100
Subject: vo_opengl: implement antiringing for tensor scalers

This is based on pretty much the same (somewhat naive) logic right now.
I'm not convinced that the extra logic that eg. madVR includes is worth
enough to warrant heavily confusing the logic for it.

This shouldn't slow down the logic at all in any sane shader compiler,
and indeed it doesn't on any shader compiler that I tested.

Note that this currently doesn't affect cscale at all, due to the weird
implementation details of that.
---
 DOCS/man/vo.rst                 |  5 ++---
 video/out/gl_video.c            |  6 +++---
 video/out/gl_video_shaders.glsl | 16 ++++++++++++----
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index e1375a8a71..317aea14aa 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -314,8 +314,6 @@ Available video output drivers are:
             controlled with ``scale-radius``. Increasing the radius makes the
             filter sharper but adds more ringing.
 
-            This filter supports antiringing (see ``scale-antiring``).
-
         ``ewa_lanczossharp``
             A slightly sharpened version of ewa_lanczos, preconfigured to use
             an ideal radius and parameter. If your hardware can run it, this is
@@ -370,7 +368,8 @@ Available video output drivers are:
         between 0.0 and 1.0. The default value of 0.0 disables antiringing
         entirely.
 
-        Note that this currently only affects EWA filters (eg. ``ewa_lanczos``).
+        Note that this doesn't affect the special filters ``bilinear``,
+        ``bicubic_fast`` or ``sharpen``.
 
     ``scaler-resizes-only``
         Disable the scaler if the video image is not resized. In that case,
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index ac2b8a8787..d812ad7960 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -866,9 +866,9 @@ static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass)
                 // The direction/pass assignment is rather arbitrary, but fixed in
                 // other parts of the code (like FBO setup).
                 const char *direction = pass == 0 ? "0, 1" : "1, 0";
-                // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)
-                APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n",
-                        name, direction, size, lut_tex, lut_fn);
+                // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC, ANTIRING)
+                APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s, %f)\n",
+                        name, direction, size, lut_tex, lut_fn, scaler->antiring);
             } else {
                 // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)
                 APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n",
diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl
index 74a2b5c3e6..87fb4d04b0 100644
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@@ -269,18 +269,26 @@ float[6] weights6(sampler2D lookup, float f) {
     }
 
 // The DIR parameter is (0, 1) or (1, 0), and we expect the shader compiler to
-// remove all the redundant multiplications and additions.
-#define SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)           \
+// remove all the redundant multiplications and additions, and also to unroll
+// the loop and remove the conditional completely
+#define SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC, ANTIRING) \
     vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {             \
         vec2 pt = (vec2(1.0) / texsize) * DIR;                              \
         float fcoord = dot(fract(texcoord * texsize - vec2(0.5)), DIR);     \
         vec2 base = texcoord - fcoord * pt - pt * vec2(N / 2 - 1);          \
         float weights[N] = WEIGHTS_FUNC(LUT, fcoord);                       \
         vec4 res = vec4(0);                                                 \
+        vec4 hi  = vec4(0);                                                 \
+        vec4 lo  = vec4(1);                                                 \
         for (int n = 0; n < N; n++) {                                       \
-            res += vec4(weights[n]) * texture(tex, base + pt * vec2(n));    \
+            vec4 c = texture(tex, base + pt * vec2(n));                     \
+            res += vec4(weights[n]) * c;                                    \
+            if (n == N/2-1 || n == N/2) {                                   \
+                lo = min(lo, c);                                            \
+                hi = max(hi, c);                                            \
+            }                                                               \
         }                                                                   \
-        return res;                                                         \
+        return mix(res, clamp(res, lo, hi), ANTIRING);                      \
     }
 
 #define SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)                    \
-- 
cgit v1.2.3