From 61bc96518afcfabfa07d6724ea517db5e9aba165 Mon Sep 17 00:00:00 2001 From: Bin Jin Date: Fri, 10 Jun 2016 12:22:51 +0000 Subject: vo_opengl: remove nnedi3 prescaler --- video/out/opengl/nnedi3.c | 248 ------------------------------------ video/out/opengl/nnedi3.h | 45 ------- video/out/opengl/nnedi3_weights.bin | Bin 161280 -> 0 bytes video/out/opengl/video.c | 72 ----------- video/out/opengl/video.h | 2 - 5 files changed, 367 deletions(-) delete mode 100644 video/out/opengl/nnedi3.c delete mode 100644 video/out/opengl/nnedi3.h delete mode 100644 video/out/opengl/nnedi3_weights.bin (limited to 'video/out') diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c deleted file mode 100644 index 74eb083786..0000000000 --- a/video/out/opengl/nnedi3.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - * - * The shader portions may have been derived from existing LGPLv3 shaders - * (see below), possibly making this file effectively LGPLv3. - */ - -#include "nnedi3.h" - -#if HAVE_NNEDI - -#include -#include -#include - -#include - -#include "video.h" - -/* - * NNEDI3, an intra-field deinterlacer - * - * The original filter was authored by Kevin Stone (aka. tritical) and is - * licensed under GPL2 terms: - * http://bengal.missouri.edu/~kes25c/ - * - * A LGPLv3 licensed OpenCL kernel was created by SEt: - * http://forum.doom9.org/showthread.php?t=169766 - * - * A HLSL port further modified by madshi, Shiandow and Zach Saw could be - * found at (also LGPLv3 licensed): - * https://github.com/zachsaw/MPDN_Extensions - * - */ - -#define GLSL(x) gl_sc_add(sc, #x "\n"); -#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) -#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); -#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) - -const struct nnedi3_opts nnedi3_opts_def = { - .neurons = 1, - .window = 0, - .upload = NNEDI3_UPLOAD_UBO, -}; - -#define OPT_BASE_STRUCT struct nnedi3_opts -const struct m_sub_options nnedi3_conf = { - .opts = (const m_option_t[]) { - OPT_CHOICE("neurons", neurons, 0, - ({"16", 0}, - {"32", 1}, - {"64", 2}, - {"128", 3})), - OPT_CHOICE("window", window, 0, - ({"8x4", 0}, - {"8x6", 1})), - OPT_CHOICE("upload", upload, 0, - ({"ubo", NNEDI3_UPLOAD_UBO}, - {"shader", NNEDI3_UPLOAD_SHADER})), - {0} - }, - .size = sizeof(struct nnedi3_opts), - .defaults = &nnedi3_opts_def, -}; - -const static char nnedi3_weights[40320 * 4 + 1] = -#include "video/out/opengl/nnedi3_weights.inc" -; - -const int nnedi3_weight_offsets[9] = - {0, 1088, 3264, 7616, 16320, 17920, 21120, 27520, 40320}; - -const int nnedi3_neurons[4] = {16, 32, 64, 128}; -const int nnedi3_window_width[2] = {8, 8}; -const int nnedi3_window_height[2] = {4, 6}; - -const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size) -{ - int idx = conf->window * 4 + conf->neurons; - const int offset = nnedi3_weight_offsets[idx]; - *size = (nnedi3_weight_offsets[idx + 1] - offset) * 4; - return (const float*)(nnedi3_weights + offset * 4); -} - -void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int step, - const struct nnedi3_opts *conf, - struct gl_transform *transform) -{ - assert(0 <= step && step < 2); - - if (!conf) - conf = &nnedi3_opts_def; - - const int neurons = nnedi3_neurons[conf->neurons]; - const int width = nnedi3_window_width[conf->window]; - const int height = nnedi3_window_height[conf->window]; - - const int offset = nnedi3_weight_offsets[conf->window * 4 + conf->neurons]; - const uint32_t *weights = (const int*)(nnedi3_weights + offset * 4); - - GLSLF("// nnedi3 (step %d, neurons %d, window %dx%d, mode %d)\n", - step, neurons, width, height, conf->upload); - - // This is required since each row will be encoded into vec4s - assert(width % 4 == 0); - const int sample_count = width * height / 4; - - if (conf->upload == NNEDI3_UPLOAD_UBO) { - char buf[32]; - snprintf(buf, sizeof(buf), "vec4 weights[%d];", - neurons * (sample_count * 2 + 1)); - gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0); - if (!gl->es && gl->glsl_version < 140) - gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object"); - } else if (conf->upload == NNEDI3_UPLOAD_SHADER) { - // Somehow necessary for hard coding approach. - GLSLH(#pragma optionNV(fastprecision on)) - } - - GLSLHF("float nnedi3() {\n"); - - if (step == 0) { - *transform = (struct gl_transform){{{1.0,0.0}, {0.0,2.0}}, {0.0,-0.5}}; - - GLSLH(if ((transpose(HOOKED_rot) * fract(HOOKED_pos * HOOKED_size)).y < 0.5) - return HOOKED_texOff(vec2(0, 0.25)).x;) - GLSLHF("#define GET(i, j) " - "HOOKED_texOff(vec2((i)-(%f),(j)-(%f)+0.25)).x\n", - width / 2.0 - 1, (height - 1) / 2.0); - } else { - *transform = (struct gl_transform){{{2.0,0.0}, {0.0,1.0}}, {-0.5,0.0}}; - - GLSLH(if (fract(HOOKED_pos.x * HOOKED_size.x) < 0.5) - return HOOKED_texOff(vec2(0.25, 0)).x;) - GLSLHF("#define GET(i, j) " - "HOOKED_texOff(vec2((j)-(%f)+0.25,(i)-(%f))).x\n", - (height - 1) / 2.0, width / 2.0 - 1); - } - - GLSLHF("vec4 samples[%d];\n", sample_count); - - for (int y = 0; y < height; y++) - for (int x = 0; x < width; x += 4) { - GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0)," - "GET(%d.0, %d.0), GET(%d.0, %d.0));\n", - (y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y); - } - - GLSLHF("float sum = 0.0, sumsq = 0.0;" - "for (int i = 0; i < %d; i++) {" - "sum += dot(samples[i], vec4(1.0));" - "sumsq += dot(samples[i], samples[i]);" - "}\n", sample_count); - - GLSLHF("float mstd0 = sum / %d.0;\n" - "float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n" - "float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n" - "mstd1 *= mstd2;\n", - width * height, width * height, FLT_EPSILON); - - GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n"); - - if (conf->upload == NNEDI3_UPLOAD_SHADER) { - GLSLH(#define T(x) intBitsToFloat(x)) - GLSLH(#define W(i,w0,w1,w2,w3) dot(samples[i],vec4(T(w0),T(w1),T(w2),T(w3)))) - - GLSLHF("#define WS(w0,w1) " - "sum1 = exp(sum1 * mstd2 + T(w0));" - "sum2 = sum2 * mstd2 + T(w1);" - "wsum += sum1;" - "vsum += sum1*(sum2/(1.0+abs(sum2)));\n"); - - for (int n = 0; n < neurons; n++) { - const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n; - for (int s = 0; s < 2; s++) { - GLSLHF("sum%d", s + 1); - for (int i = 0; i < sample_count; i++) { - GLSLHF("%cW(%d,%d,%d,%d,%d)", i == 0 ? '=' : '+', i, - (int)av_le2ne32(weights_ptr[0]), - (int)av_le2ne32(weights_ptr[1]), - (int)av_le2ne32(weights_ptr[2]), - (int)av_le2ne32(weights_ptr[3])); - weights_ptr += 4; - } - GLSLHF(";"); - } - GLSLHF("WS(%d,%d);\n", (int)av_le2ne32(weights_ptr[0]), - (int)av_le2ne32(weights_ptr[1])); - } - } else if (conf->upload == NNEDI3_UPLOAD_UBO) { - GLSLH(int idx = 0;) - - GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons); - - for (int s = 0; s < 2; s++) { - GLSLHF("sum%d = 0.0;\n" - "for (int i = 0; i < %d; i++) {" - "sum%d += dot(samples[i], weights[idx++]);" - "}\n", - s + 1, sample_count, s + 1); - } - - GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]); - sum2 = sum2 * mstd2 + weights[idx++][1]; - wsum += sum1; - vsum += sum1*(sum2/(1.0+abs(sum2)));) - - GLSLHF("}\n"); - } - - GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);) - - GLSLHF("}\n"); // nnedi3 - - GLSL(color.x = nnedi3();) -} - -#else - -const struct m_sub_options nnedi3_conf = {0}; - - -const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size) -{ - return NULL; -} - -void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int step, - const struct nnedi3_opts *conf, - struct gl_transform *transform) -{ -} - -#endif diff --git a/video/out/opengl/nnedi3.h b/video/out/opengl/nnedi3.h deleted file mode 100644 index 8cd1a65815..0000000000 --- a/video/out/opengl/nnedi3.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MP_GL_NNEDI3_H -#define MP_GL_NNEDI3_H - -#include "config.h" -#include "common.h" -#include "utils.h" - -#define HAVE_NNEDI HAVE_GPL3 - -#define NNEDI3_UPLOAD_UBO 0 -#define NNEDI3_UPLOAD_SHADER 1 - -struct nnedi3_opts { - int neurons; - int window; - int upload; -}; - -extern const struct nnedi3_opts nnedi3_opts_def; -extern const struct m_sub_options nnedi3_conf; - -const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size); - -void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int step, - const struct nnedi3_opts *conf, - struct gl_transform *transform); - -#endif diff --git a/video/out/opengl/nnedi3_weights.bin b/video/out/opengl/nnedi3_weights.bin deleted file mode 100644 index e1659d848c..0000000000 Binary files a/video/out/opengl/nnedi3_weights.bin and /dev/null differ diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 6282d296cc..48d777b207 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -38,7 +38,6 @@ #include "osd.h" #include "stream/stream.h" #include "superxbr.h" -#include "nnedi3.h" #include "video_shaders.h" #include "user_shaders.h" #include "video/out/filter_kernels.h" @@ -196,8 +195,6 @@ struct gl_video { GLuint dither_texture; int dither_size; - GLuint nnedi3_weights_buffer; - struct gl_timer *upload_timer; struct gl_timer *render_timer; struct gl_timer *present_timer; @@ -455,16 +452,12 @@ const struct m_sub_options gl_video_conf = { OPT_CHOICE("prescale-luma", prescale_luma, 0, ({"none", PRESCALE_NONE}, {"superxbr", PRESCALE_SUPERXBR} -#if HAVE_NNEDI - , {"nnedi3", PRESCALE_NNEDI3} -#endif )), OPT_INTRANGE("prescale-passes", prescale_passes, 0, 1, MAX_PRESCALE_PASSES), OPT_FLOATRANGE("prescale-downscaling-threshold", prescale_downscaling_threshold, 0, 0.0, 32.0), OPT_SUBSTRUCT("superxbr", superxbr_opts, superxbr_conf, 0), - OPT_SUBSTRUCT("nnedi3", nnedi3_opts, nnedi3_conf, 0), OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), OPT_REMOVED("approx-gamma", "this is always enabled now"), @@ -594,9 +587,6 @@ static void uninit_rendering(struct gl_video *p) gl->DeleteTextures(1, &p->dither_texture); p->dither_texture = 0; - gl->DeleteBuffers(1, &p->nnedi3_weights_buffer); - p->nnedi3_weights_buffer = 0; - for (int n = 0; n < 4; n++) { fbotex_uninit(&p->merge_fbo[n]); fbotex_uninit(&p->scale_fbo[n]); @@ -1520,27 +1510,6 @@ static int get_prescale_passes(struct gl_video *p) return passes; } -// Upload the NNEDI3 UBO weights only if needed -static void upload_nnedi3_weights(struct gl_video *p) -{ - GL *gl = p->gl; - - if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO && - !p->nnedi3_weights_buffer) - { - gl->GenBuffers(1, &p->nnedi3_weights_buffer); - gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, p->nnedi3_weights_buffer); - - int size; - const float *weights = get_nnedi3_weights(p->opts.nnedi3_opts, &size); - - MP_VERBOSE(p, "Uploading NNEDI3 weights via UBO (size=%d)\n", size); - - // We don't know the endianness of GPU, just assume it's LE - gl->BufferData(GL_UNIFORM_BUFFER, size, weights, GL_STATIC_DRAW); - } -} - // Returns true if two img_texs are semantically equivalent (same metadata) static bool img_tex_equiv(struct img_tex a, struct img_tex b) { @@ -1594,14 +1563,6 @@ static void superxbr_hook(struct gl_video *p, struct img_tex tex, pass_superxbr(p->sc, step, p->opts.superxbr_opts, trans); } -static void nnedi3_hook(struct gl_video *p, struct img_tex tex, - struct gl_transform *trans, void *priv) -{ - int step = (uintptr_t)priv; - upload_nnedi3_weights(p); - pass_nnedi3(p->gl, p->sc, step, p->opts.nnedi3_opts, trans); -} - static void unsharp_hook(struct gl_video *p, struct img_tex tex, struct gl_transform *trans, void *priv) { @@ -1849,19 +1810,6 @@ static void gl_video_setup_hooks(struct gl_video *p) } } - if (p->opts.prescale_luma == PRESCALE_NNEDI3) { - for (int i = 0; i < prescale_passes; i++) { - for (int step = 0; step < 2; step++) { - pass_add_hook(p, (struct tex_hook) { - .hook_tex = "LUMA", - .bind_tex = {"HOOKED"}, - .hook = nnedi3_hook, - .priv = (void *)(uintptr_t)step, - }); - } - } - } - if (p->opts.unsharp != 0.0) { pass_add_hook(p, (struct tex_hook) { .hook_tex = "MAIN", @@ -3283,26 +3231,6 @@ static void check_gl_features(struct gl_video *p) p->opts.deband = 0; MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); } - - if (p->opts.prescale_luma == PRESCALE_NNEDI3) { - if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) { - // Check features for uniform buffer objects. - if (!gl->BindBufferBase || !gl->GetUniformBlockIndex) { - MP_WARN(p, "Disabling NNEDI3 (%s required).\n", - gl->es ? "OpenGL ES 3.0" : "OpenGL 3.1"); - p->opts.prescale_luma = PRESCALE_NONE; - } - } else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) { - // Check features for hard coding approach. - if ((!gl->es && gl->glsl_version < 330) || - (gl->es && gl->glsl_version < 300)) - { - MP_WARN(p, "Disabling NNEDI3 (%s required).\n", - gl->es ? "OpenGL ES 3.0" : "OpenGL 3.3"); - p->opts.prescale_luma = PRESCALE_NONE; - } - } - } } static void init_gl(struct gl_video *p) diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h index 9e44cf8b06..09d05ce449 100644 --- a/video/out/opengl/video.h +++ b/video/out/opengl/video.h @@ -95,7 +95,6 @@ enum blend_subs_mode { enum prescalers { PRESCALE_NONE = 0, PRESCALE_SUPERXBR, - PRESCALE_NNEDI3, }; enum tone_mapping { @@ -147,7 +146,6 @@ struct gl_video_opts { int prescale_passes; float prescale_downscaling_threshold; struct superxbr_opts *superxbr_opts; - struct nnedi3_opts *nnedi3_opts; struct mp_icc_opts *icc_opts; }; -- cgit v1.2.3