From 42a0f4d87b601e1a3c797c31043d3ca777881974 Mon Sep 17 00:00:00 2001 From: Bin Jin Date: Wed, 2 Dec 2015 00:28:26 +0000 Subject: vo_opengl: enable NNEDI3 prescaler on OpenGL ES 3.0 It turns out that both UBO and intBitsToFloat() are supported in OpenGL ES 3.0[1][2], enable them so that NNEDI3 prescaler can be used in a wider range of backends. Also fixes some implicit int-to-float conversions so that the shader actually compiles on GLES. Tested on Linux desktop (nvidia 358.16) with "es" sub-option. [1]: https://www.khronos.org/opengles/sdk/docs/man3/html/glGetUniformBlockIndex.xhtml [2]: https://www.khronos.org/opengles/sdk/docs/manglsl/docbook4/xhtml/intBitsToFloat.xml --- video/out/opengl/common.c | 1 + video/out/opengl/nnedi3.c | 20 ++++++++++---------- video/out/opengl/video.c | 12 ++++++++---- 3 files changed, 19 insertions(+), 14 deletions(-) (limited to 'video') diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index ae4384ec30..a93ecf9d0b 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -326,6 +326,7 @@ static const struct gl_functions gl_functions[] = { // uniform buffer object extensions, requires OpenGL 3.1. { .ver_core = 310, + .ver_es_core = 300, .extension = "GL_ARB_uniform_buffer_object", .functions = (const struct gl_function[]) { DEF_FN(GetUniformBlockIndex), diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c index 04131078e3..fcb9aa3e01 100644 --- a/video/out/opengl/nnedi3.c +++ b/video/out/opengl/nnedi3.c @@ -108,7 +108,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, snprintf(buf, sizeof(buf), "vec4 weights[%d];", neurons * (sample_count * 2 + 1)); gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0); - if (gl->glsl_version < 140) + if (!gl->es && gl->glsl_version < 140) gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object"); } else if (conf->upload == NNEDI3_UPLOAD_SHADER) { // Somehow necessary for hard coding approach. @@ -139,12 +139,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, for (int y = 0; y < height; y++) for (int x = 0; x < width; x += 4) { - GLSLHF("samples[%d] = vec4(GET(%d, %d), GET(%d, %d)," - "GET(%d, %d), GET(%d, %d));\n", + GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0)," + "GET(%d.0, %d.0), GET(%d.0, %d.0));\n", (y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y); } - GLSLHF("float sum = 0, sumsq = 0;" + GLSLHF("float sum = 0.0, sumsq = 0.0;" "for (int i = 0; i < %d; i++) {" "sum += dot(samples[i], vec4(1.0));" "sumsq += dot(samples[i], samples[i]);" @@ -152,11 +152,11 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, GLSLHF("float mstd0 = sum / %d.0;\n" "float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n" - "float mstd2 = mix(0, inversesqrt(mstd1), mstd1 >= %.12e);\n" + "float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n" "mstd1 *= mstd2;\n", width * height, width * height, FLT_EPSILON); - GLSLHF("float vsum = 0, wsum = 0, sum1, sum2;\n"); + GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n"); if (conf->upload == NNEDI3_UPLOAD_SHADER) { GLSLH(#define T(x) intBitsToFloat(x)) @@ -166,7 +166,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, "sum1 = exp(sum1 * mstd2 + T(w0));" "sum2 = sum2 * mstd2 + T(w1);" "wsum += sum1;" - "vsum += sum1*(sum2/(1+abs(sum2)));\n"); + "vsum += sum1*(sum2/(1.0+abs(sum2)));\n"); for (int n = 0; n < neurons; n++) { const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n; @@ -191,7 +191,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons); for (int s = 0; s < 2; s++) { - GLSLHF("sum%d = 0;\n" + GLSLHF("sum%d = 0.0;\n" "for (int i = 0; i < %d; i++) {" "sum%d += dot(samples[i], weights[idx++]);" "}\n", @@ -201,12 +201,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]); sum2 = sum2 * mstd2 + weights[idx++][1]; wsum += sum1; - vsum += sum1*(sum2/(1+abs(sum2)));) + vsum += sum1*(sum2/(1.0+abs(sum2)));) GLSLHF("}\n"); } - GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0, 1);) + GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);) GLSLHF("}\n"); // nnedi3 diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 287c240691..f07fbb104b 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -2483,14 +2483,18 @@ static void check_gl_features(struct gl_video *p) if (p->opts.prescale == 2) { if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) { // Check features for uniform buffer objects. - if (!p->gl->BindBufferBase || !p->gl->GetUniformBlockIndex) { - MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.1 required).\n"); + if (!gl->BindBufferBase || !gl->GetUniformBlockIndex) { + MP_WARN(p, "Disabling NNEDI3 (%s required).\n", + gl->es ? "OpenGL ES 3.0" : "OpenGL 3.1"); p->opts.prescale = 0; } } else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) { // Check features for hard coding approach. - if (p->gl->glsl_version < 330) { - MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.3 required).\n"); + if ((!gl->es && gl->glsl_version < 330) || + (gl->es && gl->glsl_version < 300)) + { + MP_WARN(p, "Disabling NNEDI3 (%s required).\n", + gl->es ? "OpenGL ES 3.0" : "OpenGL 3.3"); p->opts.prescale = 0; } } -- cgit v1.2.3