summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorBin Jin <bjin1990@gmail.com>2015-12-02 00:28:26 +0000
committerwm4 <wm4@nowhere>2015-12-02 12:32:02 +0100
commit42a0f4d87b601e1a3c797c31043d3ca777881974 (patch)
tree8396475463a5f3dfbcf8a417544bcd43e14a7ec0 /video
parent69cc002c9294a2982dc3753a9602c10d34c1020b (diff)
downloadmpv-42a0f4d87b601e1a3c797c31043d3ca777881974.tar.bz2
mpv-42a0f4d87b601e1a3c797c31043d3ca777881974.tar.xz
vo_opengl: enable NNEDI3 prescaler on OpenGL ES 3.0
It turns out that both UBO and intBitsToFloat() are supported in OpenGL ES 3.0[1][2], enable them so that NNEDI3 prescaler can be used in a wider range of backends. Also fixes some implicit int-to-float conversions so that the shader actually compiles on GLES. Tested on Linux desktop (nvidia 358.16) with "es" sub-option. [1]: https://www.khronos.org/opengles/sdk/docs/man3/html/glGetUniformBlockIndex.xhtml [2]: https://www.khronos.org/opengles/sdk/docs/manglsl/docbook4/xhtml/intBitsToFloat.xml
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/common.c1
-rw-r--r--video/out/opengl/nnedi3.c20
-rw-r--r--video/out/opengl/video.c12
3 files changed, 19 insertions, 14 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index ae4384ec30..a93ecf9d0b 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -326,6 +326,7 @@ static const struct gl_functions gl_functions[] = {
// uniform buffer object extensions, requires OpenGL 3.1.
{
.ver_core = 310,
+ .ver_es_core = 300,
.extension = "GL_ARB_uniform_buffer_object",
.functions = (const struct gl_function[]) {
DEF_FN(GetUniformBlockIndex),
diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c
index 04131078e3..fcb9aa3e01 100644
--- a/video/out/opengl/nnedi3.c
+++ b/video/out/opengl/nnedi3.c
@@ -108,7 +108,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
snprintf(buf, sizeof(buf), "vec4 weights[%d];",
neurons * (sample_count * 2 + 1));
gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0);
- if (gl->glsl_version < 140)
+ if (!gl->es && gl->glsl_version < 140)
gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object");
} else if (conf->upload == NNEDI3_UPLOAD_SHADER) {
// Somehow necessary for hard coding approach.
@@ -139,12 +139,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x += 4) {
- GLSLHF("samples[%d] = vec4(GET(%d, %d), GET(%d, %d),"
- "GET(%d, %d), GET(%d, %d));\n",
+ GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0),"
+ "GET(%d.0, %d.0), GET(%d.0, %d.0));\n",
(y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y);
}
- GLSLHF("float sum = 0, sumsq = 0;"
+ GLSLHF("float sum = 0.0, sumsq = 0.0;"
"for (int i = 0; i < %d; i++) {"
"sum += dot(samples[i], vec4(1.0));"
"sumsq += dot(samples[i], samples[i]);"
@@ -152,11 +152,11 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
GLSLHF("float mstd0 = sum / %d.0;\n"
"float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n"
- "float mstd2 = mix(0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
+ "float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
"mstd1 *= mstd2;\n",
width * height, width * height, FLT_EPSILON);
- GLSLHF("float vsum = 0, wsum = 0, sum1, sum2;\n");
+ GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n");
if (conf->upload == NNEDI3_UPLOAD_SHADER) {
GLSLH(#define T(x) intBitsToFloat(x))
@@ -166,7 +166,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
"sum1 = exp(sum1 * mstd2 + T(w0));"
"sum2 = sum2 * mstd2 + T(w1);"
"wsum += sum1;"
- "vsum += sum1*(sum2/(1+abs(sum2)));\n");
+ "vsum += sum1*(sum2/(1.0+abs(sum2)));\n");
for (int n = 0; n < neurons; n++) {
const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n;
@@ -191,7 +191,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons);
for (int s = 0; s < 2; s++) {
- GLSLHF("sum%d = 0;\n"
+ GLSLHF("sum%d = 0.0;\n"
"for (int i = 0; i < %d; i++) {"
"sum%d += dot(samples[i], weights[idx++]);"
"}\n",
@@ -201,12 +201,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]);
sum2 = sum2 * mstd2 + weights[idx++][1];
wsum += sum1;
- vsum += sum1*(sum2/(1+abs(sum2)));)
+ vsum += sum1*(sum2/(1.0+abs(sum2)));)
GLSLHF("}\n");
}
- GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0, 1);)
+ GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);)
GLSLHF("}\n"); // nnedi3
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 287c240691..f07fbb104b 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -2483,14 +2483,18 @@ static void check_gl_features(struct gl_video *p)
if (p->opts.prescale == 2) {
if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) {
// Check features for uniform buffer objects.
- if (!p->gl->BindBufferBase || !p->gl->GetUniformBlockIndex) {
- MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.1 required).\n");
+ if (!gl->BindBufferBase || !gl->GetUniformBlockIndex) {
+ MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
+ gl->es ? "OpenGL ES 3.0" : "OpenGL 3.1");
p->opts.prescale = 0;
}
} else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) {
// Check features for hard coding approach.
- if (p->gl->glsl_version < 330) {
- MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.3 required).\n");
+ if ((!gl->es && gl->glsl_version < 330) ||
+ (gl->es && gl->glsl_version < 300))
+ {
+ MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
+ gl->es ? "OpenGL ES 3.0" : "OpenGL 3.3");
p->opts.prescale = 0;
}
}