From 9c484cb080330d131e5a0b0049492f3583f463ff Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Mon, 8 Dec 2014 16:04:08 +0100
Subject: vo_opengl: refactor: instantiate scaler functions at runtime

Before this commit, the convolution scaler shader functions were pre-
instantiated in the shader file. For every filter size, a corresponding
function (with the filter size as suffix) had to be present.

Change this, and make the C code emit the necessary bits.

This means the shader code is much reduced. (Although hopefully it
doesn't make shader compilation faster - it would require a really dumb
compiler if it spends its time on dead code.)

It also makes it more flexible, which is the main goal.

The DEF_SCALER0 stuff is needed because the C code writes the header of
the shader, at a point where scaler macros are not defined yet.
---
 video/out/gl_video.c            | 36 +++++++++++++++++++++--------
 video/out/gl_video_shaders.glsl | 51 ++++++++++++++---------------------------
 2 files changed, 43 insertions(+), 44 deletions(-)

diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index 124d949a03..cb32600395 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -835,27 +835,43 @@ static void shader_def_opt(char **shader, const char *name, bool b)
         shader_def(shader, name, "1");
 }
 
+#define APPENDF(s_ptr, ...) \
+    *(s_ptr) = talloc_asprintf_append(*(s_ptr), __VA_ARGS__)
+
 static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass)
 {
-    const char *target = scaler->index == 0 ? "SAMPLE_L" : "SAMPLE_C";
+    int unit = scaler->index;
+    const char *target = unit == 0 ? "SAMPLE_L" : "SAMPLE_C";
     if (!scaler->kernel) {
-        *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) "
-            "sample_%s(p0, p1, p2, filter_param1_%c)\n",
-            target, scaler->name, "lc"[scaler->index]);
+        APPENDF(shader, "#define %s(p0, p1, p2) "
+                "sample_%s(p0, p1, p2, filter_param1_%c)\n",
+                target, scaler->name, "lc"[unit]);
     } else {
         int size = scaler->kernel->size;
+        const char *lut_tex = scaler->lut_name;
+        char name[40];
+        snprintf(name, sizeof(name), "sample_scaler%d", unit);
+        APPENDF(shader, "#define DEF_SCALER%d \\\n", unit);
+        char lut_fn[40];
+        if (size < 8) {
+            snprintf(lut_fn, sizeof(lut_fn), "weights%d", size);
+        } else {
+            snprintf(lut_fn, sizeof(lut_fn), "weights_scaler%d", unit);
+            APPENDF(shader, "    WEIGHTS_N(%s, %d) \\\n    ", lut_fn, size);
+        }
         if (pass != -1) {
             // The direction/pass assignment is rather arbitrary, but fixed in
             // other parts of the code (like FBO setup).
             const char *direction = pass == 0 ? "0, 1" : "1, 0";
-            *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) "
-                "sample_convolution_sep%d(vec2(%s), %s, p0, p1, p2)\n",
-                target, size, direction, scaler->lut_name);
+            // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)
+            APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n",
+                    name, direction, size, lut_tex, lut_fn);
         } else {
-            *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) "
-                "sample_convolution%d(%s, p0, p1, p2)\n",
-                target, size, scaler->lut_name);
+            // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)
+            APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n",
+                    name, size, lut_tex, lut_fn);
         }
+        APPENDF(shader, "#define %s %s\n", target, name);
     }
 }
 
diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl
index b8e30e175f..5a32fa1222 100644
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@@ -235,6 +235,7 @@ float[6] weights6(sampler2D lookup, float f) {
     return float[6](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);
 }
 
+// For N=n*4 with n>1 (N==4 is covered by weights4()).
 #define WEIGHTS_N(NAME, N)                          \
     float[N] NAME(sampler2D lookup, float f) {      \
         float r[N];                                 \
@@ -249,21 +250,14 @@ float[6] weights6(sampler2D lookup, float f) {
         return r;                                   \
     }
 
-WEIGHTS_N(weights8, 8)
-WEIGHTS_N(weights12, 12)
-WEIGHTS_N(weights16, 16)
-WEIGHTS_N(weights32, 32)
-WEIGHTS_N(weights64, 64)
-
-// The dir parameter is (0, 1) or (1, 0), and we expect the shader compiler to
+// The DIR parameter is (0, 1) or (1, 0), and we expect the shader compiler to
 // remove all the redundant multiplications and additions.
-#define SAMPLE_CONVOLUTION_SEP_N(NAME, N, WEIGHTS_FUNC)                     \
-    vec4 NAME(vec2 dir, sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize,  \
-              vec2 texcoord) {                                              \
-        vec2 pt = (1 / texsize) * dir;                                      \
-        float fcoord = dot(fract(texcoord * texsize - 0.5), dir);           \
+#define SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)           \
+    vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {             \
+        vec2 pt = (1 / texsize) * DIR;                                      \
+        float fcoord = dot(fract(texcoord * texsize - 0.5), DIR);           \
         vec2 base = texcoord - fcoord * pt - pt * (N / 2 - 1);              \
-        float weights[N] = WEIGHTS_FUNC(lookup, fcoord);                    \
+        float weights[N] = WEIGHTS_FUNC(LUT, fcoord);                       \
         vec4 res = vec4(0);                                                 \
         for (int n = 0; n < N; n++) {                                       \
             res += weights[n] * texture(tex, base + pt * n);                \
@@ -271,23 +265,14 @@ WEIGHTS_N(weights64, 64)
         return res;                                                         \
     }
 
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, weights2)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, weights4)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, weights6)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, weights8)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, weights12)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, weights16)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep32, 32, weights32)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, weights64)
-
-#define SAMPLE_CONVOLUTION_N(NAME, N, WEIGHTS_FUNC)                         \
-    vec4 NAME(sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {\
+#define SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)                    \
+    vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {             \
         vec2 pt = 1 / texsize;                                              \
         vec2 fcoord = fract(texcoord * texsize - 0.5);                      \
         vec2 base = texcoord - fcoord * pt - pt * (N / 2 - 1);              \
         vec4 res = vec4(0);                                                 \
-        float w_x[N] = WEIGHTS_FUNC(lookup, fcoord.x);                      \
-        float w_y[N] = WEIGHTS_FUNC(lookup, fcoord.y);                      \
+        float w_x[N] = WEIGHTS_FUNC(LUT, fcoord.x);                         \
+        float w_y[N] = WEIGHTS_FUNC(LUT, fcoord.y);                         \
         for (int y = 0; y < N; y++) {                                       \
             vec4 line = vec4(0);                                            \
             for (int x = 0; x < N; x++)                                     \
@@ -297,14 +282,12 @@ SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, weights64)
         return res;                                                         \
     }
 
-SAMPLE_CONVOLUTION_N(sample_convolution2, 2, weights2)
-SAMPLE_CONVOLUTION_N(sample_convolution4, 4, weights4)
-SAMPLE_CONVOLUTION_N(sample_convolution6, 6, weights6)
-SAMPLE_CONVOLUTION_N(sample_convolution8, 8, weights8)
-SAMPLE_CONVOLUTION_N(sample_convolution12, 12, weights12)
-SAMPLE_CONVOLUTION_N(sample_convolution16, 16, weights16)
-SAMPLE_CONVOLUTION_N(sample_convolution32, 32, weights32)
-SAMPLE_CONVOLUTION_N(sample_convolution64, 64, weights64)
+#ifdef DEF_SCALER0
+DEF_SCALER0
+#endif
+#ifdef DEF_SCALER1
+DEF_SCALER1
+#endif
 
 // Unsharp masking
 vec4 sample_sharpen3(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float param1) {
-- 
cgit v1.2.3