From be167c227b842427f285732a054ea025a630e591 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Mon, 28 Dec 2020 19:38:35 +0100
Subject: vo_gpu: cast bvecN to vecN for mix() on older GLSL

Fixes https://github.com/mpv-player/mpv/issues/8415, among others
---
 video/out/gpu/shader_cache.c  | 20 +++++++++++
 video/out/gpu/shader_cache.h  |  4 +++
 video/out/gpu/video.c         | 27 +++++++-------
 video/out/gpu/video_shaders.c | 83 ++++++++++++++++++++++++-------------------
 4 files changed, 85 insertions(+), 49 deletions(-)

(limited to 'video/out')

diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c
index 5e96de9c99..bf0983f5f0 100644
--- a/video/out/gpu/shader_cache.c
+++ b/video/out/gpu/shader_cache.c
@@ -458,6 +458,26 @@ void gl_sc_blend(struct gl_shader_cache *sc,
     sc->params.blend_dst_alpha = blend_dst_alpha;
 }
 
+const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims)
+{
+    static const char *bvecs[] = {
+        [1] = "bool",
+        [2] = "bvec2",
+        [3] = "bvec3",
+        [4] = "bvec4",
+    };
+
+    static const char *vecs[] = {
+        [1] = "float",
+        [2] = "vec2",
+        [3] = "vec3",
+        [4] = "vec4",
+    };
+
+    assert(dims > 0 && dims < MP_ARRAY_SIZE(bvecs));
+    return sc->ra->glsl_version >= 130 ? bvecs[dims] : vecs[dims];
+}
+
 static const char *vao_glsl_type(const struct ra_renderpass_input *e)
 {
     // pretty dumb... too dumb, but works for us
diff --git a/video/out/gpu/shader_cache.h b/video/out/gpu/shader_cache.h
index 547c6b6307..3c87513b2b 100644
--- a/video/out/gpu/shader_cache.h
+++ b/video/out/gpu/shader_cache.h
@@ -43,6 +43,10 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
                         bool transpose, float *v);
 void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
                         bool transpose, float *v);
+
+// Return the correct bvecN() variant for using mix() in this GLSL version
+const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims);
+
 void gl_sc_blend(struct gl_shader_cache *sc,
                  enum ra_blend blend_src_rgb,
                  enum ra_blend blend_dst_rgb,
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index a99e70270e..fa2f3a28a3 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2341,26 +2341,29 @@ static void pass_convert_yuv(struct gl_video *p)
         // as per the BT.2020 specification, table 4. This is a non-linear
         // transformation because (constant) luminance receives non-equal
         // contributions from the three different channels.
-        GLSLF("// constant luminance conversion\n");
-        GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936),
-                                       vec2(1.9404, 1.7184),
-                                       lessThanEqual(color.br, vec2(0)))
-                        + color.gg;)
+        GLSLF("// constant luminance conversion \n"
+              "color.br = color.br * mix(vec2(1.5816, 0.9936),              \n"
+              "                         vec2(1.9404, 1.7184),               \n"
+              "                         %s(lessThanEqual(color.br, vec2(0))))\n"
+              "          + color.gg;                                        \n",
+              gl_sc_bvec(p->sc, 2));
         // Expand channels to camera-linear light. This shader currently just
         // assumes everything uses the BT.2020 12-bit gamma function, since the
         // difference between 10 and 12-bit is negligible for anything other
         // than 12-bit content.
-        GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5),
-                             pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993),
-                                 vec3(1.0/0.45)),
-                             lessThanEqual(vec3(0.08145), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5),                       \n"
+              "                pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n"
+              "                    vec3(1.0/0.45)),                             \n"
+              "                %s(lessThanEqual(vec3(0.08145), color.rgb)));    \n",
+              gl_sc_bvec(p->sc, 3));
         // Calculate the green channel from the expanded RYcB
         // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B
         GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;)
         // Recompress to receive the R'G'B' result, same as other systems
-        GLSL(color.rgb = mix(color.rgb * vec3(4.5),
-                             vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993),
-                             lessThanEqual(vec3(0.0181), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(4.5),                       \n"
+              "                vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n"
+              "                %s(lessThanEqual(vec3(0.0181), color.rgb))); \n",
+              gl_sc_bvec(p->sc, 3));
     }
 
     p->components = 3;
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index da48929328..7073185eaf 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -354,9 +354,10 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
 
     switch (trc) {
     case MP_CSP_TRC_SRGB:
-        GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92),
-                             pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)),
-                             lessThan(vec3(0.04045), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/12.92),             \n"
+              "                pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), \n"
+              "                %s(lessThan(vec3(0.04045), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_TRC_BT_1886:
         GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
@@ -380,9 +381,10 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
         GLSL(color.rgb = pow(color.rgb, vec3(2.8));)
         break;
     case MP_CSP_TRC_PRO_PHOTO:
-        GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0),
-                             pow(color.rgb, vec3(1.8)),
-                             lessThan(vec3(0.03125), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/16.0),              \n"
+              "                pow(color.rgb, vec3(1.8)),               \n"
+              "                %s(lessThan(vec3(0.03125), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_TRC_PQ:
         GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2);
@@ -397,16 +399,16 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
     case MP_CSP_TRC_HLG:
         GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n"
               "                exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n"
-              "                lessThan(vec3(0.5), color.rgb));\n",
-              HLG_C, HLG_A, HLG_B);
+              "                %s(lessThan(vec3(0.5), color.rgb)));\n",
+              HLG_C, HLG_A, HLG_B, gl_sc_bvec(sc, 3));
         GLSLF("color.rgb *= vec3(1.0/%f);\n", MP_REF_WHITE_HLG);
         break;
     case MP_CSP_TRC_V_LOG:
         GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n"
               "    pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
               "              - vec3(%f),                                  \n"
-              "    lessThanEqual(vec3(0.181), color.rgb));                \n",
-              VLOG_D, VLOG_C, VLOG_B);
+              "    %s(lessThanEqual(vec3(0.181), color.rgb)));            \n",
+              VLOG_D, VLOG_C, VLOG_B, gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_TRC_S_LOG1:
         GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n"
@@ -417,8 +419,8 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
         GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f),      \n"
               "    (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
               "              - vec3(%f)) * vec3(1.0/%f),                   \n"
-              "    lessThanEqual(vec3(%f), color.rgb));                    \n",
-              SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q);
+              "    %s(lessThanEqual(vec3(%f), color.rgb)));                \n",
+              SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, gl_sc_bvec(sc, 3), SLOG_Q);
         break;
     default:
         abort();
@@ -444,10 +446,11 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
 
     switch (trc) {
     case MP_CSP_TRC_SRGB:
-        GLSL(color.rgb = mix(color.rgb * vec3(12.92),
-                             vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))
-                                 - vec3(0.055),
-                             lessThanEqual(vec3(0.0031308), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(12.92),                       \n"
+              "               vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))     \n"
+              "                   - vec3(0.055),                              \n"
+              "               %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_TRC_BT_1886:
         GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
@@ -471,9 +474,10 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
         GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));)
         break;
     case MP_CSP_TRC_PRO_PHOTO:
-        GLSL(color.rgb = mix(color.rgb * vec3(16.0),
-                             pow(color.rgb, vec3(1.0/1.8)),
-                             lessThanEqual(vec3(0.001953), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(16.0),                        \n"
+              "                pow(color.rgb, vec3(1.0/1.8)),                 \n"
+              "                %s(lessThanEqual(vec3(0.001953), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_TRC_PQ:
         GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE);
@@ -487,15 +491,15 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
         GLSLF("color.rgb *= vec3(%f);\n", MP_REF_WHITE_HLG);
         GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n"
               "                vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n"
-              "                lessThan(vec3(1.0), color.rgb));\n",
-              HLG_A, HLG_B, HLG_C);
+              "                %s(lessThan(vec3(1.0), color.rgb)));\n",
+              HLG_A, HLG_B, HLG_C, gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_TRC_V_LOG:
         GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125),   \n"
               "                vec3(%f) * log(color.rgb + vec3(%f))   \n"
               "                    + vec3(%f),                        \n"
-              "                lessThanEqual(vec3(0.01), color.rgb)); \n",
-              VLOG_C / M_LN10, VLOG_B, VLOG_D);
+              "                %s(lessThanEqual(vec3(0.01), color.rgb))); \n",
+              VLOG_C / M_LN10, VLOG_B, VLOG_D, gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_TRC_S_LOG1:
         GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n",
@@ -505,8 +509,8 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
         GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f),                \n"
               "                vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n"
               "                    + vec3(%f),                                 \n"
-              "                lessThanEqual(vec3(0.0), color.rgb));           \n",
-              SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C);
+              "                %s(lessThanEqual(vec3(0.0), color.rgb)));       \n",
+              SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C, gl_sc_bvec(sc, 3));
         break;
     default:
         abort();
@@ -537,9 +541,10 @@ static void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light,
         // This OOTF is defined by encoding the result as 709 and then decoding
         // it as 1886; although this is called 709_1886 we actually use the
         // more precise (by one decimal) values from BT.2020 instead
-        GLSL(color.rgb = mix(color.rgb * vec3(4.5),
-                             vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993),
-                             lessThan(vec3(0.0181), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(4.5),                  \n"
+              "                vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n"
+              "                %s(lessThan(vec3(0.0181), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
         GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
         break;
     case MP_CSP_LIGHT_SCENE_1_2:
@@ -570,10 +575,11 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh
     }
     case MP_CSP_LIGHT_SCENE_709_1886:
         GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
-        GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5),
-                             pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993),
-                                 vec3(1/0.45)),
-                             lessThan(vec3(0.08145), color.rgb));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5),               \n"
+              "                pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n"
+              "                    vec3(1/0.45)),                       \n"
+              "                %s(lessThan(vec3(0.08145), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
         break;
     case MP_CSP_LIGHT_SCENE_1_2:
         GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));)
@@ -718,7 +724,8 @@ static void pass_tone_map(struct gl_shader_cache *sc,
               "max(1e-6, sig_peak - 1.0);\n");
         GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n");
         GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b)),"
-              "          greaterThan(sig, vec3(j)));\n");
+              "          %s(greaterThan(sig, vec3(j))));\n",
+              gl_sc_bvec(sc, 3));
         GLSLF("}\n");
         break;
 
@@ -751,7 +758,8 @@ static void pass_tone_map(struct gl_shader_cache *sc,
         GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;)
         GLSLF("sig = mix(scale * sig,"
               "          pow(sig / sig_peak, vec3(gamma)),"
-              "          greaterThan(sig, vec3(cutoff)));\n");
+              "          %s(greaterThan(sig, vec3(cutoff))));\n",
+              gl_sc_bvec(sc, 3));
         break;
     }
 
@@ -784,7 +792,8 @@ static void pass_tone_map(struct gl_shader_cache *sc,
               "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) +       \n"
               "          (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) +              \n"
               "          (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum);               \n"
-              "sig = mix(pb, sig_pq.rgb, lessThan(sig_pq.rgb, vec3(ks)));       \n");
+              "sig = mix(pb, sig_pq.rgb, %s(lessThan(sig_pq.rgb, vec3(ks))));   \n",
+              gl_sc_bvec(sc, 3));
         // Convert back from PQ space to linear light
         GLSLF("sig *= vec3(sig_pq.a);                                           \n"
               "sig = pow(sig, vec3(1.0/%f));                                    \n"
@@ -990,8 +999,8 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
         // the difference is below the given threshold
         GLSLF("avg = average(%f, h);\n", i * opts->range);
         GLSL(diff = abs(color - avg);)
-        GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n",
-              opts->threshold / (i * 16384.0));
+        GLSLF("color = mix(avg, color, %s(greaterThan(diff, vec4(%f))));\n",
+              gl_sc_bvec(sc, 4), opts->threshold / (i * 16384.0));
     }
 
     // Add some random noise to smooth out residual differences
-- 
cgit v1.2.3