summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/nnedi3.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/opengl/nnedi3.c')
-rw-r--r--video/out/opengl/nnedi3.c59
1 files changed, 49 insertions, 10 deletions
diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c
index 04131078e3..bb200b0f7a 100644
--- a/video/out/opengl/nnedi3.c
+++ b/video/out/opengl/nnedi3.c
@@ -18,10 +18,15 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
+ *
+ * The shader portions may have been derived from existing LGPLv3 shaders
+ * (see below), possibly making this file effectively LGPLv3.
*/
#include "nnedi3.h"
+#if HAVE_NNEDI
+
#include <assert.h>
#include <stdint.h>
#include <float.h>
@@ -30,6 +35,22 @@
#include "video.h"
+/*
+ * NNEDI3, an intra-field deinterlacer
+ *
+ * The original filter was authored by Kevin Stone (aka. tritical) and is
+ * licensed under GPL2 terms:
+ * http://bengal.missouri.edu/~kes25c/
+ *
+ * A LGPLv3 licensed OpenCL kernel was created by SEt:
+ * http://forum.doom9.org/showthread.php?t=169766
+ *
+ * A HLSL port further modified by madshi, Shiandow and Zach Saw could be
+ * found at (also LGPLv3 licensed):
+ * https://github.com/zachsaw/MPDN_Extensions
+ *
+ */
+
#define GLSL(x) gl_sc_add(sc, #x "\n");
#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
#define GLSLH(x) gl_sc_hadd(sc, #x "\n");
@@ -108,7 +129,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
snprintf(buf, sizeof(buf), "vec4 weights[%d];",
neurons * (sample_count * 2 + 1));
gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0);
- if (gl->glsl_version < 140)
+ if (!gl->es && gl->glsl_version < 140)
gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object");
} else if (conf->upload == NNEDI3_UPLOAD_SHADER) {
// Somehow necessary for hard coding approach.
@@ -139,12 +160,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x += 4) {
- GLSLHF("samples[%d] = vec4(GET(%d, %d), GET(%d, %d),"
- "GET(%d, %d), GET(%d, %d));\n",
+ GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0),"
+ "GET(%d.0, %d.0), GET(%d.0, %d.0));\n",
(y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y);
}
- GLSLHF("float sum = 0, sumsq = 0;"
+ GLSLHF("float sum = 0.0, sumsq = 0.0;"
"for (int i = 0; i < %d; i++) {"
"sum += dot(samples[i], vec4(1.0));"
"sumsq += dot(samples[i], samples[i]);"
@@ -152,11 +173,11 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
GLSLHF("float mstd0 = sum / %d.0;\n"
"float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n"
- "float mstd2 = mix(0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
+ "float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
"mstd1 *= mstd2;\n",
width * height, width * height, FLT_EPSILON);
- GLSLHF("float vsum = 0, wsum = 0, sum1, sum2;\n");
+ GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n");
if (conf->upload == NNEDI3_UPLOAD_SHADER) {
GLSLH(#define T(x) intBitsToFloat(x))
@@ -166,7 +187,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
"sum1 = exp(sum1 * mstd2 + T(w0));"
"sum2 = sum2 * mstd2 + T(w1);"
"wsum += sum1;"
- "vsum += sum1*(sum2/(1+abs(sum2)));\n");
+ "vsum += sum1*(sum2/(1.0+abs(sum2)));\n");
for (int n = 0; n < neurons; n++) {
const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n;
@@ -191,7 +212,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons);
for (int s = 0; s < 2; s++) {
- GLSLHF("sum%d = 0;\n"
+ GLSLHF("sum%d = 0.0;\n"
"for (int i = 0; i < %d; i++) {"
"sum%d += dot(samples[i], weights[idx++]);"
"}\n",
@@ -201,12 +222,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]);
sum2 = sum2 * mstd2 + weights[idx++][1];
wsum += sum1;
- vsum += sum1*(sum2/(1+abs(sum2)));)
+ vsum += sum1*(sum2/(1.0+abs(sum2)));)
GLSLHF("}\n");
}
- GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0, 1);)
+ GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);)
GLSLHF("}\n"); // nnedi3
@@ -217,3 +238,21 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
i, tex_num, tex_num, tex_num, i, tex_mul);
}
}
+
+#else
+
+const struct m_sub_options nnedi3_conf = {0};
+
+
+const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size)
+{
+ return NULL;
+}
+
+void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
+ int step, float tex_mul, const struct nnedi3_opts *conf,
+ struct gl_transform *transform)
+{
+}
+
+#endif