From 42a0f4d87b601e1a3c797c31043d3ca777881974 Mon Sep 17 00:00:00 2001
From: Bin Jin <bjin1990@gmail.com>
Date: Wed, 2 Dec 2015 00:28:26 +0000
Subject: vo_opengl: enable NNEDI3 prescaler on OpenGL ES 3.0

It turns out that both UBO and intBitsToFloat() are supported in
OpenGL ES 3.0[1][2], enable them so that NNEDI3 prescaler can be used
in a wider range of backends.

Also fixes some implicit int-to-float conversions so that the shader
actually compiles on GLES.

Tested on Linux desktop (nvidia 358.16) with "es" sub-option.

[1]: https://www.khronos.org/opengles/sdk/docs/man3/html/glGetUniformBlockIndex.xhtml
[2]: https://www.khronos.org/opengles/sdk/docs/manglsl/docbook4/xhtml/intBitsToFloat.xml
---
 video/out/opengl/common.c |  1 +
 video/out/opengl/nnedi3.c | 20 ++++++++++----------
 video/out/opengl/video.c  | 12 ++++++++----
 3 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'video')

diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index ae4384ec30..a93ecf9d0b 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -326,6 +326,7 @@ static const struct gl_functions gl_functions[] = {
     // uniform buffer object extensions, requires OpenGL 3.1.
     {
         .ver_core = 310,
+        .ver_es_core = 300,
         .extension = "GL_ARB_uniform_buffer_object",
         .functions = (const struct gl_function[]) {
             DEF_FN(GetUniformBlockIndex),
diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c
index 04131078e3..fcb9aa3e01 100644
--- a/video/out/opengl/nnedi3.c
+++ b/video/out/opengl/nnedi3.c
@@ -108,7 +108,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
         snprintf(buf, sizeof(buf), "vec4 weights[%d];",
                  neurons * (sample_count * 2 + 1));
         gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0);
-        if (gl->glsl_version < 140)
+        if (!gl->es && gl->glsl_version < 140)
             gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object");
     } else if (conf->upload == NNEDI3_UPLOAD_SHADER) {
         // Somehow necessary for hard coding approach.
@@ -139,12 +139,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
 
     for (int y = 0; y < height; y++)
         for (int x = 0; x < width; x += 4) {
-            GLSLHF("samples[%d] = vec4(GET(%d, %d), GET(%d, %d),"
-                                      "GET(%d, %d), GET(%d, %d));\n",
+            GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0),"
+                                      "GET(%d.0, %d.0), GET(%d.0, %d.0));\n",
                    (y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y);
         }
 
-    GLSLHF("float sum = 0, sumsq = 0;"
+    GLSLHF("float sum = 0.0, sumsq = 0.0;"
            "for (int i = 0; i < %d; i++) {"
                "sum += dot(samples[i], vec4(1.0));"
                "sumsq += dot(samples[i], samples[i]);"
@@ -152,11 +152,11 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
 
     GLSLHF("float mstd0 = sum / %d.0;\n"
            "float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n"
-           "float mstd2 = mix(0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
+           "float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
            "mstd1 *= mstd2;\n",
            width * height, width * height, FLT_EPSILON);
 
-    GLSLHF("float vsum = 0, wsum = 0, sum1, sum2;\n");
+    GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n");
 
     if (conf->upload == NNEDI3_UPLOAD_SHADER) {
         GLSLH(#define T(x) intBitsToFloat(x))
@@ -166,7 +166,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
                "sum1 = exp(sum1 * mstd2 + T(w0));"
                "sum2 = sum2 * mstd2 + T(w1);"
                "wsum += sum1;"
-               "vsum += sum1*(sum2/(1+abs(sum2)));\n");
+               "vsum += sum1*(sum2/(1.0+abs(sum2)));\n");
 
         for (int n = 0; n < neurons; n++) {
             const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n;
@@ -191,7 +191,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
         GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons);
 
         for (int s = 0; s < 2; s++) {
-            GLSLHF("sum%d = 0;\n"
+            GLSLHF("sum%d = 0.0;\n"
                    "for (int i = 0; i < %d; i++) {"
                        "sum%d += dot(samples[i], weights[idx++]);"
                    "}\n",
@@ -201,12 +201,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
         GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]);
               sum2 = sum2 * mstd2 + weights[idx++][1];
               wsum += sum1;
-              vsum += sum1*(sum2/(1+abs(sum2)));)
+              vsum += sum1*(sum2/(1.0+abs(sum2)));)
 
         GLSLHF("}\n");
     }
 
-    GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0, 1);)
+    GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);)
 
     GLSLHF("}\n"); // nnedi3
 
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 287c240691..f07fbb104b 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -2483,14 +2483,18 @@ static void check_gl_features(struct gl_video *p)
     if (p->opts.prescale == 2) {
         if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) {
             // Check features for uniform buffer objects.
-            if (!p->gl->BindBufferBase || !p->gl->GetUniformBlockIndex) {
-                MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.1 required).\n");
+            if (!gl->BindBufferBase || !gl->GetUniformBlockIndex) {
+                MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
+                        gl->es ? "OpenGL ES 3.0" : "OpenGL 3.1");
                 p->opts.prescale = 0;
             }
         } else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) {
             // Check features for hard coding approach.
-            if (p->gl->glsl_version < 330) {
-                MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.3 required).\n");
+            if ((!gl->es && gl->glsl_version < 330) ||
+                (gl->es && gl->glsl_version < 300))
+            {
+                MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
+                        gl->es ? "OpenGL ES 3.0" : "OpenGL 3.3");
                 p->opts.prescale = 0;
             }
         }
-- 
cgit v1.2.3