vo_opengl: refactor into vo_gpu

This is done in several steps: 1. refactor MPGLContext -> struct ra_ctx 2. move GL-specific stuff in vo_opengl into opengl/context.c 3. generalize context creation to support other APIs, and add --gpu-api 4. rename all of the --opengl- options that are no longer opengl-specific 5. move all of the stuff from opengl/* that isn't GL-specific into gpu/ (note: opengl/gl_utils.h became opengl/utils.h) 6. rename vo_opengl to vo_gpu 7. to handle window screenshots, the short-term approach was to just add it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to ra itself (and vo_gpu altered to compensate), but this was a stop-gap measure to prevent this commit from getting too big 8. move ra->fns->flush to ra_gl_ctx instead 9. some other minor changes that I've probably already forgotten Note: This is one half of a major refactor, the other half of which is provided by rossy's following commit. This commit enables support for all linux platforms, while his version enables support for all non-linux platforms. Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the --opengl- options like --opengl-early-flush, --opengl-finish etc. Should be a strict superset of the old functionality. Disclaimer: Since I have no way of compiling mpv on all platforms, some of these ports were done blindly. Specifically, the blind ports included context_mali_fbdev.c and context_rpi.c. Since they're both based on egl_helpers, the port should have gone smoothly without any major changes required. But if somebody complains about a compile error on those platforms (assuming anybody actually uses them), you know where to complain.
author: Niklas Haas <git@haasn.xyz> 2017-09-14 08:04:55 +0200
committer: Niklas Haas <git@haasn.xyz> 2017-09-21 15:00:55 +0200
commit: 65979986a923a8f08019b257c3fe72cd5e8ecf68 (patch)
tree: b8f4b8c17d583594aef0ca509064f8b2ff7128d4 /video/out/opengl/video_shaders.c
parent: 20f958c9775652c3213588c2a0824f5353276adc (diff)
download: mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.bz2
mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.xz
1 files changed, 0 insertions, 872 deletions
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
deleted file mode 100644
index 60c5ce82ac..0000000000
--- a/video/out/opengl/video_shaders.c
+++ /dev/null
@@ -1,872 +0,0 @@
-/*
- * This file is part of mpv.
- *
- * mpv is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * mpv is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <math.h>
-
-#include "video_shaders.h"
-#include "video.h"
-
-#define GLSL(x) gl_sc_add(sc, #x "\n");
-#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
-#define GLSLH(x) gl_sc_hadd(sc, #x "\n");
-#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__)
-
-// Set up shared/commonly used variables and macros
-void sampler_prelude(struct gl_shader_cache *sc, int tex_num)
-{
-    GLSLF("#undef tex\n");
-    GLSLF("#undef texmap\n");
-    GLSLF("#define tex texture%d\n", tex_num);
-    GLSLF("#define texmap texmap%d\n", tex_num);
-    GLSLF("vec2 pos = texcoord%d;\n", tex_num);
-    GLSLF("vec2 size = texture_size%d;\n", tex_num);
-    GLSLF("vec2 pt = pixel_size%d;\n", tex_num);
-}
-
-static void pass_sample_separated_get_weights(struct gl_shader_cache *sc,
-                                              struct scaler *scaler)
-{
-    gl_sc_uniform_texture(sc, "lut", scaler->lut);
-    GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size);
-
-    int N = scaler->kernel->size;
-    int width = (N + 3) / 4; // round up
-
-    GLSLF("float weights[%d];\n", N);
-    for (int i = 0; i < N; i++) {
-        if (i % 4 == 0)
-            GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width);
-        GLSLF("weights[%d] = c[%d];\n", i, i % 4);
-    }
-}
-
-// Handle a single pass (either vertical or horizontal). The direction is given
-// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is
-// used instead (samples from texture0 through textureN)
-void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
-                               int d_x, int d_y)
-{
-    int N = scaler->kernel->size;
-    bool use_ar = scaler->conf.antiring > 0;
-    bool planar = d_x == 0 && d_y == 0;
-    GLSL(color = vec4(0.0);)
-    GLSLF("{\n");
-    if (!planar) {
-        GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y);
-        GLSL(pt *= dir;)
-        GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);)
-        GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1);
-    }
-    GLSL(vec4 c;)
-    if (use_ar) {
-        GLSL(vec4 hi = vec4(0.0);)
-        GLSL(vec4 lo = vec4(1.0);)
-    }
-    pass_sample_separated_get_weights(sc, scaler);
-    GLSLF("// scaler samples\n");
-    for (int n = 0; n < N; n++) {
-        if (planar) {
-            GLSLF("c = texture(texture%d, texcoord%d);\n", n, n);
-        } else {
-            GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n);
-        }
-        GLSLF("color += vec4(weights[%d]) * c;\n", n);
-        if (use_ar && (n == N/2-1 || n == N/2)) {
-            GLSL(lo = min(lo, c);)
-            GLSL(hi = max(hi, c);)
-        }
-    }
-    if (use_ar)
-        GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
-              scaler->conf.antiring);
-    GLSLF("}\n");
-}
-
-// Subroutine for computing and adding an individual texel contribution
-// If subtexel < 0 and offset < 0, samples directly.
-// If subtexel >= 0, takes the texel from cN[subtexel]
-// If offset >= 0, takes the texel from inN[rel.y+y+offset][rel.x+x+offset]
-static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
-                         int x, int y, int subtexel, int offset, int components)
-{
-    double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale;
-    double radius_cutoff = scaler->kernel->radius_cutoff;
-
-    // Since we can't know the subpixel position in advance, assume a
-    // worst case scenario
-    int yy = y > 0 ? y-1 : y;
-    int xx = x > 0 ? x-1 : x;
-    double dmax = sqrt(xx*xx + yy*yy);
-    // Skip samples definitely outside the radius
-    if (dmax >= radius_cutoff)
-        return;
-    GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y);
-    // Check for samples that might be skippable
-    bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
-    if (maybe_skippable)
-        GLSLF("if (d < %f) {\n", radius_cutoff);
-
-    // get the weight for this pixel
-    if (scaler->lut->params.dimensions == 1) {
-        GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n",
-              radius, scaler->lut_size);
-    } else {
-        GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n",
-              radius, scaler->lut_size);
-    }
-    GLSL(wsum += w;)
-
-    if (subtexel < 0 && offset < 0) {
-        GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
-        GLSL(color += vec4(w) * c0;)
-    } else if (subtexel >= 0) {
-        for (int n = 0; n < components; n++)
-            GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel);
-    } else if (offset >= 0) {
-        for (int n = 0; n <components; n++)
-            GLSLF("color[%d] += w * in%d[rel.y+%d][rel.x+%d];\n", n, n,
-                  y + offset, x + offset);
-    } else {
-        // invalid usage
-        abort();
-    }
-
-    if (maybe_skippable)
-        GLSLF("}\n");
-}
-
-void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
-                       int components, int glsl_version)
-{
-    GLSL(color = vec4(0.0);)
-    GLSLF("{\n");
-    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
-    GLSL(vec2 base = pos - fcoord * pt;)
-    GLSLF("float w, d, wsum = 0.0;\n");
-    for (int n = 0; n < components; n++)
-        GLSLF("vec4 c%d;\n", n);
-
-    gl_sc_uniform_texture(sc, "lut", scaler->lut);
-
-    GLSLF("// scaler samples\n");
-    int bound = ceil(scaler->kernel->radius_cutoff);
-    for (int y = 1-bound; y <= bound; y += 2) {
-        for (int x = 1-bound; x <= bound; x += 2) {
-            // First we figure out whether it's more efficient to use direct
-            // sampling or gathering. The problem is that gathering 4 texels
-            // only to discard some of them is very wasteful, so only do it if
-            // we suspect it will be a win rather than a loss. This is the case
-            // exactly when all four texels are within bounds
-            bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff;
-
-            // textureGather is only supported in GLSL 400+
-            if (glsl_version < 400)
-                use_gather = false;
-
-            if (use_gather) {
-                // Gather the four surrounding texels simultaneously
-                for (int n = 0; n < components; n++) {
-                    GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n",
-                          n, x, y, n);
-                }
-
-                // Mix in all of the points with their weights
-                for (int p = 0; p < 4; p++) {
-                    // The four texels are gathered counterclockwise starting
-                    // from the bottom left
-                    static const int xo[4] = {0, 1, 1, 0};
-                    static const int yo[4] = {1, 1, 0, 0};
-                    if (x+xo[p] > bound || y+yo[p] > bound)
-                        continue;
-                    polar_sample(sc, scaler, x+xo[p], y+yo[p], p, -1, components);
-                }
-            } else {
-                // switch to direct sampling instead, for efficiency/compatibility
-                for (int yy = y; yy <= bound && yy <= y+1; yy++) {
-                    for (int xx = x; xx <= bound && xx <= x+1; xx++)
-                        polar_sample(sc, scaler, xx, yy, -1, -1, components);
-                }
-            }
-        }
-    }
-
-    GLSL(color = color / vec4(wsum);)
-    GLSLF("}\n");
-}
-
-// bw/bh: block size
-// iw/ih: input size (pre-calculated to fit all required texels)
-void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler,
-                        int components, int bw, int bh, int iw, int ih)
-{
-    int bound = ceil(scaler->kernel->radius_cutoff);
-    int offset = bound - 1; // padding top/left
-
-    GLSL(color = vec4(0.0);)
-    GLSLF("{\n");
-    GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);)
-    GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));)
-    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
-    GLSL(vec2 base = pos - pt * fcoord;)
-    GLSL(ivec2 rel = ivec2(round((base - wbase) * size));)
-    GLSLF("float w, d, wsum = 0.0;\n");
-    gl_sc_uniform_texture(sc, "lut", scaler->lut);
-
-    // Load all relevant texels into shmem
-    gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays");
-    for (int c = 0; c < components; c++)
-        GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw);
-
-    GLSL(vec4 c;)
-    GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh);
-    GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw);
-    GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset);
-    for (int c = 0; c < components; c++)
-        GLSLF("in%d[y][x] = c[%d];\n", c, c);
-    GLSLF("}}\n");
-    GLSL(groupMemoryBarrier();)
-    GLSL(barrier();)
-
-    // Dispatch the actual samples
-    GLSLF("// scaler samples\n");
-    for (int y = 1-bound; y <= bound; y++) {
-        for (int x = 1-bound; x <= bound; x++)
-            polar_sample(sc, scaler, x, y, -1, offset, components);
-    }
-
-    GLSL(color = color / vec4(wsum);)
-    GLSLF("}\n");
-}
-
-static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s)
-{
-    // Explanation of how bicubic scaling with only 4 texel fetches is done:
-    //   http://www.mate.tue.nl/mate/pdfs/10318.pdf
-    //   'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
-    // Explanation why this algorithm normally always blurs, even with unit
-    // scaling:
-    //   http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
-    //   'GPU Prefilter for Accurate Cubic B-spline Interpolation'
-    GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s"
-                " + vec4(1, 0, -0.5, 0.5);\n", t, s);
-    GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s);
-    GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s);
-    GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t);
-    GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s);
-}
-
-void pass_sample_bicubic_fast(struct gl_shader_cache *sc)
-{
-    GLSLF("{\n");
-    GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));)
-    bicubic_calcweights(sc, "parmx", "fcoord.x");
-    bicubic_calcweights(sc, "parmy", "fcoord.y");
-    GLSL(vec4 cdelta;)
-    GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);)
-    GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);)
-    // first y-interpolation
-    GLSL(vec4 ar = texture(tex, pos + cdelta.xy);)
-    GLSL(vec4 ag = texture(tex, pos + cdelta.xw);)
-    GLSL(vec4 ab = mix(ag, ar, parmy.b);)
-    // second y-interpolation
-    GLSL(vec4 br = texture(tex, pos + cdelta.zy);)
-    GLSL(vec4 bg = texture(tex, pos + cdelta.zw);)
-    GLSL(vec4 aa = mix(bg, br, parmy.b);)
-    // x-interpolation
-    GLSL(color = mix(aa, ab, parmx.b);)
-    GLSLF("}\n");
-}
-
-void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
-                                   int w, int h)
-{
-    GLSLF("{\n");
-    GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest
-    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
-    // Determine the mixing coefficient vector
-    gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h});
-    GLSL(vec2 coeff = fcoord * output_size/size;)
-    float threshold = scaler->conf.kernel.params[0];
-    threshold = isnan(threshold) ? 0.0 : threshold;
-    GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold);
-    GLSL(coeff = clamp(coeff, 0.0, 1.0);)
-    // Compute the right blend of colors
-    GLSL(color = texture(tex, pos + pt * (coeff - fcoord));)
-    GLSLF("}\n");
-}
-
-// Common constants for SMPTE ST.2084 (HDR)
-static const float PQ_M1 = 2610./4096 * 1./4,
-                   PQ_M2 = 2523./4096 * 128,
-                   PQ_C1 = 3424./4096,
-                   PQ_C2 = 2413./4096 * 32,
-                   PQ_C3 = 2392./4096 * 32;
-
-// Common constants for ARIB STD-B67 (HLG)
-static const float HLG_A = 0.17883277,
-                   HLG_B = 0.28466892,
-                   HLG_C = 0.55991073;
-
-// Common constants for Panasonic V-Log
-static const float VLOG_B = 0.00873,
-                   VLOG_C = 0.241514,
-                   VLOG_D = 0.598206;
-
-// Common constants for Sony S-Log
-static const float SLOG_A = 0.432699,
-                   SLOG_B = 0.037584,
-                   SLOG_C = 0.616596 + 0.03,
-                   SLOG_P = 3.538813,
-                   SLOG_Q = 0.030001,
-                   SLOG_K2 = 155.0 / 219.0;
-
-// Linearize (expand), given a TRC as input. In essence, this is the ITU-R
-// EOTF, calculated on an idealized (reference) monitor with a white point of
-// MP_REF_WHITE and infinite contrast.
-void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
-{
-    if (trc == MP_CSP_TRC_LINEAR)
-        return;
-
-    GLSLF("// linearize\n");
-
-    // Note that this clamp may technically violate the definition of
-    // ITU-R BT.2100, which allows for sub-blacks and super-whites to be
-    // displayed on the display where such would be possible. That said, the
-    // problem is that not all gamma curves are well-defined on the values
-    // outside this range, so we ignore it and just clip anyway for sanity.
-    GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
-
-    switch (trc) {
-    case MP_CSP_TRC_SRGB:
-        GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92),
-                             pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)),
-                             lessThan(vec3(0.04045), color.rgb));)
-        break;
-    case MP_CSP_TRC_BT_1886:
-        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
-        break;
-    case MP_CSP_TRC_GAMMA18:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.8));)
-        break;
-    case MP_CSP_TRC_GAMMA22:
-        GLSL(color.rgb = pow(color.rgb, vec3(2.2));)
-        break;
-    case MP_CSP_TRC_GAMMA28:
-        GLSL(color.rgb = pow(color.rgb, vec3(2.8));)
-        break;
-    case MP_CSP_TRC_PRO_PHOTO:
-        GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0),
-                             pow(color.rgb, vec3(1.8)),
-                             lessThan(vec3(0.03125), color.rgb));)
-        break;
-    case MP_CSP_TRC_PQ:
-        GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2);
-        GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n"
-              "             / (vec3(%f) - vec3(%f) * color.rgb);\n",
-              PQ_C1, PQ_C2, PQ_C3);
-        GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M1);
-        // PQ's output range is 0-10000, but we need it to be relative to to
-        // MP_REF_WHITE instead, so rescale
-        GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE);
-        break;
-    case MP_CSP_TRC_HLG:
-        GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n"
-              "                exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n"
-              "                lessThan(vec3(0.5), color.rgb));\n",
-              HLG_C, HLG_A, HLG_B);
-        break;
-    case MP_CSP_TRC_V_LOG:
-        GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n"
-              "    pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
-              "              - vec3(%f),                                  \n"
-              "    lessThanEqual(vec3(0.181), color.rgb));                \n",
-              VLOG_D, VLOG_C, VLOG_B);
-        break;
-    case MP_CSP_TRC_S_LOG1:
-        GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n"
-              "            - vec3(%f);\n",
-              SLOG_C, SLOG_A, SLOG_B);
-        break;
-    case MP_CSP_TRC_S_LOG2:
-        GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f),      \n"
-              "    (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
-              "              - vec3(%f)) * vec3(1.0/%f),                   \n"
-              "    lessThanEqual(vec3(%f), color.rgb));                    \n",
-              SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q);
-        break;
-    default:
-        abort();
-    }
-
-    // Rescale to prevent clipping on non-float textures
-    GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc));
-}
-
-// Delinearize (compress), given a TRC as output. This corresponds to the
-// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a
-// reference monitor.
-void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
-{
-    if (trc == MP_CSP_TRC_LINEAR)
-        return;
-
-    GLSLF("// delinearize\n");
-    GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
-    GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc));
-
-    switch (trc) {
-    case MP_CSP_TRC_SRGB:
-        GLSL(color.rgb = mix(color.rgb * vec3(12.92),
-                             vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))
-                                 - vec3(0.055),
-                             lessThanEqual(vec3(0.0031308), color.rgb));)
-        break;
-    case MP_CSP_TRC_BT_1886:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
-        break;
-    case MP_CSP_TRC_GAMMA18:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));)
-        break;
-    case MP_CSP_TRC_GAMMA22:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));)
-        break;
-    case MP_CSP_TRC_GAMMA28:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));)
-        break;
-    case MP_CSP_TRC_PRO_PHOTO:
-        GLSL(color.rgb = mix(color.rgb * vec3(16.0),
-                             pow(color.rgb, vec3(1.0/1.8)),
-                             lessThanEqual(vec3(0.001953), color.rgb));)
-        break;
-    case MP_CSP_TRC_PQ:
-        GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE);
-        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1);
-        GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n"
-              "             / (vec3(1.0) + vec3(%f) * color.rgb);\n",
-              PQ_C1, PQ_C2, PQ_C3);
-        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2);
-        break;
-    case MP_CSP_TRC_HLG:
-        GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n"
-              "                vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n"
-              "                lessThan(vec3(1.0), color.rgb));\n",
-              HLG_A, HLG_B, HLG_C);
-        break;
-    case MP_CSP_TRC_V_LOG:
-        GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125),   \n"
-              "                vec3(%f) * log(color.rgb + vec3(%f))   \n"
-              "                    + vec3(%f),                        \n"
-              "                lessThanEqual(vec3(0.01), color.rgb)); \n",
-              VLOG_C / M_LN10, VLOG_B, VLOG_D);
-        break;
-    case MP_CSP_TRC_S_LOG1:
-        GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n",
-              SLOG_A / M_LN10, SLOG_B, SLOG_C);
-        break;
-    case MP_CSP_TRC_S_LOG2:
-        GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f),                \n"
-              "                vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n"
-              "                    + vec3(%f),                                 \n"
-              "                lessThanEqual(vec3(0.0), color.rgb));           \n",
-              SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C);
-        break;
-    default:
-        abort();
-    }
-}
-
-// Apply the OOTF mapping from a given light type to display-referred light.
-// The extra peak parameter is used to scale the values before and after
-// the OOTF, and can be inferred using mp_trc_nom_peak
-void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak)
-{
-    if (light == MP_CSP_LIGHT_DISPLAY)
-        return;
-
-    GLSLF("// apply ootf\n");
-    GLSLF("color.rgb *= vec3(%f);\n", peak);
-
-    switch (light)
-    {
-    case MP_CSP_LIGHT_SCENE_HLG:
-        // HLG OOTF from BT.2100, assuming a reference display with a
-        // peak of 1000 cd/m² -> gamma = 1.2
-        GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), 0.2));\n",
-              (1000 / MP_REF_WHITE) / pow(12, 1.2));
-        break;
-    case MP_CSP_LIGHT_SCENE_709_1886:
-        // This OOTF is defined by encoding the result as 709 and then decoding
-        // it as 1886; although this is called 709_1886 we actually use the
-        // more precise (by one decimal) values from BT.2020 instead
-        GLSL(color.rgb = mix(color.rgb * vec3(4.5),
-                             vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993),
-                             lessThan(vec3(0.0181), color.rgb));)
-        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
-        break;
-    case MP_CSP_LIGHT_SCENE_1_2:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.2));)
-        break;
-    default:
-        abort();
-    }
-
-    GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
-}
-
-// Inverse of the function pass_ootf, for completeness' sake.
-void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak)
-{
-    if (light == MP_CSP_LIGHT_DISPLAY)
-        return;
-
-    GLSLF("// apply inverse ootf\n");
-    GLSLF("color.rgb *= vec3(%f);\n", peak);
-
-    switch (light)
-    {
-    case MP_CSP_LIGHT_SCENE_HLG:
-        GLSLF("color.rgb *= vec3(1.0/%f);\n", (1000 / MP_REF_WHITE) / pow(12, 1.2));
-        GLSL(color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), 0.2/1.2)));)
-        break;
-    case MP_CSP_LIGHT_SCENE_709_1886:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
-        GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5),
-                             pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993),
-                                 vec3(1/0.45)),
-                             lessThan(vec3(0.08145), color.rgb));)
-        break;
-    case MP_CSP_LIGHT_SCENE_1_2:
-        GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));)
-        break;
-    default:
-        abort();
-    }
-
-    GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
-}
-
-// Tone map from a known peak brightness to the range [0,1]. If ref_peak
-// is 0, we will use peak detection instead
-static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
-                          enum tone_mapping algo, float param, float desat)
-{
-    GLSLF("// HDR tone mapping\n");
-
-    // Desaturate the color using a coefficient dependent on the luminance
-    GLSL(float luma = dot(dst_luma, color.rgb);)
-    if (desat > 0) {
-        GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat);
-        GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);)
-    }
-
-    // To prevent discoloration due to out-of-bounds clipping, we need to make
-    // sure to reduce the value range as far as necessary to keep the entire
-    // signal in range, so tone map based on the brightest component.
-    GLSL(float sig = max(max(color.r, color.g), color.b);)
-    GLSL(float sig_orig = sig;)
-
-    if (!ref_peak) {
-        // For performance, we want to do as few atomic operations on global
-        // memory as possible, so use an atomic in shmem for the work group.
-        // We also want slightly more stable values, so use the group average
-        // instead of the group max
-        GLSLHF("shared uint group_sum = 0;\n");
-        GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE);
-
-        // Have one thread in each work group update the frame maximum
-        GLSL(memoryBarrierBuffer();)
-        GLSL(barrier();)
-        GLSL(if (gl_LocalInvocationIndex == 0))
-            GLSL(atomicMax(frame_max[index], group_sum /
-                 (gl_WorkGroupSize.x * gl_WorkGroupSize.y));)
-
-        // Finally, have one thread per invocation update the total maximum
-        // and advance the index
-        GLSL(memoryBarrierBuffer();)
-        GLSL(barrier();)
-        GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation
-            GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
-            GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n");
-            GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE);
-            GLSL(index = next;)
-        GLSL(})
-
-        GLSL(memoryBarrierBuffer();)
-        GLSL(barrier();)
-        GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n",
-              MP_REF_WHITE * PEAK_DETECT_FRAMES);
-    } else {
-        GLSLHF("const float sig_peak = %f;\n", ref_peak);
-    }
-
-    switch (algo) {
-    case TONE_MAPPING_CLIP:
-        GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param);
-        break;
-
-    case TONE_MAPPING_MOBIUS:
-        GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param);
-        // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
-        // where M(x) = scale * (x+a)/(x+b)
-        GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n");
-        GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / "
-              "max(1e-6, sig_peak - 1.0);\n");
-        GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n");
-        GLSL(sig = mix(sig, scale * (sig + a) / (sig + b), sig > j);)
-        break;
-
-    case TONE_MAPPING_REINHARD: {
-        float contrast = isnan(param) ? 0.5 : param,
-              offset = (1.0 - contrast) / contrast;
-        GLSLF("sig = sig / (sig + %f);\n", offset);
-        GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset);
-        GLSL(sig *= scale;)
-        break;
-    }
-
-    case TONE_MAPPING_HABLE: {
-        float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;
-        GLSLHF("float hable(float x) {\n");
-        GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n",
-               A, C*B, D*E, A, B, D*F, E/F);
-        GLSLHF("}\n");
-        GLSL(sig = hable(sig) / hable(sig_peak);)
-        break;
-    }
-
-    case TONE_MAPPING_GAMMA: {
-        float gamma = isnan(param) ? 1.8 : param;
-        GLSLF("const float cutoff = 0.05, gamma = %f;\n", 1.0/gamma);
-        GLSL(float scale = pow(cutoff / sig_peak, gamma) / cutoff;)
-        GLSL(sig = sig > cutoff ? pow(sig / sig_peak, gamma) : scale * sig;)
-        break;
-    }
-
-    case TONE_MAPPING_LINEAR: {
-        float coeff = isnan(param) ? 1.0 : param;
-        GLSLF("sig = %f / sig_peak * sig;\n", coeff);
-        break;
-    }
-
-    default:
-        abort();
-    }
-
-    // Apply the computed scale factor to the color, linearly to prevent
-    // discoloration
-    GLSL(color.rgb *= sig / sig_orig;)
-}
-
-// Map colors from one source space to another. These source spaces must be
-// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
-// auto-guessing. If is_linear is true, we assume the input has already been
-// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will
-// detect the peak instead of relying on metadata. Note that this requires
-// the caller to have already bound the appropriate SSBO and set up the
-// compute shader metadata
-void pass_color_map(struct gl_shader_cache *sc,
-                    struct mp_colorspace src, struct mp_colorspace dst,
-                    enum tone_mapping algo, float tone_mapping_param,
-                    float tone_mapping_desat, bool detect_peak,
-                    bool gamut_warning, bool is_linear)
-{
-    GLSLF("// color mapping\n");
-
-    // Compute the highest encodable level
-    float src_range = mp_trc_nom_peak(src.gamma),
-          dst_range = mp_trc_nom_peak(dst.gamma);
-    float ref_peak = src.sig_peak / dst_range;
-
-    // Some operations need access to the video's luma coefficients, so make
-    // them available
-    float rgb2xyz[3][3];
-    mp_get_rgb2xyz_matrix(mp_get_csp_primaries(src.primaries), rgb2xyz);
-    gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz[1]);
-    mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz);
-    gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]);
-
-    // All operations from here on require linear light as a starting point,
-    // so we linearize even if src.gamma == dst.gamma when one of the other
-    // operations needs it
-    bool need_gamma = src.gamma != dst.gamma ||
-                      src.primaries != dst.primaries ||
-                      src_range != dst_range ||
-                      src.sig_peak > dst_range ||
-                      src.light != dst.light;
-
-    if (need_gamma && !is_linear) {
-        pass_linearize(sc, src.gamma);
-        is_linear= true;
-    }
-
-    if (src.light != dst.light)
-        pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma));
-
-    // Rescale the signal to compensate for differences in the encoding range
-    // and reference white level. This is necessary because of how mpv encodes
-    // brightness in textures.
-    if (src_range != dst_range) {
-        GLSLF("// rescale value range;\n");
-        GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range);
-    }
-
-    // Adapt to the right colorspace if necessary
-    if (src.primaries != dst.primaries) {
-        struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries),
-                                csp_dst = mp_get_csp_primaries(dst.primaries);
-        float m[3][3] = {{0}};
-        mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m);
-        gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]);
-        GLSL(color.rgb = cms_matrix * color.rgb;)
-        // Since this can reduce the gamut, figure out by how much
-        for (int c = 0; c < 3; c++)
-            ref_peak = MPMAX(ref_peak, m[c][c]);
-    }
-
-    // Tone map to prevent clipping when the source signal peak exceeds the
-    // encodable range or we've reduced the gamut
-    if (ref_peak > 1) {
-        pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo,
-                      tone_mapping_param, tone_mapping_desat);
-    }
-
-    if (src.light != dst.light)
-        pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma));
-
-    // Warn for remaining out-of-gamut colors is enabled
-    if (gamut_warning) {
-        GLSL(if (any(greaterThan(color.rgb, vec3(1.01)))))
-            GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert
-    }
-
-    if (is_linear)
-        pass_delinearize(sc, dst.gamma);
-}
-
-// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post.
-// Obtain random numbers by calling rand(h), followed by h = permute(h) to
-// update the state. Assumes the texture was hooked.
-static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg)
-{
-    GLSLH(float mod289(float x)  { return x - floor(x * 1.0/289.0) * 289.0; })
-    GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); })
-    GLSLH(float rand(float x)    { return fract(x * 1.0/41.0); })
-
-    // Initialize the PRNG by hashing the position + a random uniform
-    GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);)
-    GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);)
-    gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX);
-}
-
-struct deband_opts {
-    int enabled;
-    int iterations;
-    float threshold;
-    float range;
-    float grain;
-};
-
-const struct deband_opts deband_opts_def = {
-    .iterations = 1,
-    .threshold = 64.0,
-    .range = 16.0,
-    .grain = 48.0,
-};
-
-#define OPT_BASE_STRUCT struct deband_opts
-const struct m_sub_options deband_conf = {
-    .opts = (const m_option_t[]) {
-        OPT_INTRANGE("iterations", iterations, 0, 1, 16),
-        OPT_FLOATRANGE("threshold", threshold, 0, 0.0, 4096.0),
-        OPT_FLOATRANGE("range", range, 0, 1.0, 64.0),
-        OPT_FLOATRANGE("grain", grain, 0, 0.0, 4096.0),
-        {0}
-    },
-    .size = sizeof(struct deband_opts),
-    .defaults = &deband_opts_def,
-};
-
-// Stochastically sample a debanded result from a hooked texture.
-void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
-                        AVLFG *lfg, enum mp_csp_trc trc)
-{
-    // Initialize the PRNG
-    GLSLF("{\n");
-    prng_init(sc, lfg);
-
-    // Helper: Compute a stochastic approximation of the avg color around a
-    // pixel
-    GLSLHF("vec4 average(float range, inout float h) {\n");
-        // Compute a random rangle and distance
-        GLSLH(float dist = rand(h) * range;     h = permute(h);)
-        GLSLH(float dir  = rand(h) * 6.2831853; h = permute(h);)
-        GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));)
-
-        // Sample at quarter-turn intervals around the source pixel
-        GLSLH(vec4 ref[4];)
-        GLSLH(ref[0] = HOOKED_texOff(vec2( o.x,  o.y));)
-        GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y,  o.x));)
-        GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));)
-        GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));)
-
-        // Return the (normalized) average
-        GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;)
-    GLSLHF("}\n");
-
-    // Sample the source pixel
-    GLSL(color = HOOKED_tex(HOOKED_pos);)
-    GLSLF("vec4 avg, diff;\n");
-    for (int i = 1; i <= opts->iterations; i++) {
-        // Sample the average pixel and use it instead of the original if
-        // the difference is below the given threshold
-        GLSLF("avg = average(%f, h);\n", i * opts->range);
-        GLSL(diff = abs(color - avg);)
-        GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n",
-              opts->threshold / (i * 16384.0));
-    }
-
-    // Add some random noise to smooth out residual differences
-    GLSL(vec3 noise;)
-    GLSL(noise.x = rand(h); h = permute(h);)
-    GLSL(noise.y = rand(h); h = permute(h);)
-    GLSL(noise.z = rand(h); h = permute(h);)
-
-    // Noise is scaled to the signal level to prevent extreme noise for HDR
-    float gain = opts->grain/8192.0 / mp_trc_nom_peak(trc);
-    GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain);
-    GLSLF("}\n");
-}
-
-// Assumes the texture was hooked
-void pass_sample_unsharp(struct gl_shader_cache *sc, float param) {
-    GLSLF("{\n");
-    GLSL(float st1 = 1.2;)
-    GLSL(vec4 p = HOOKED_tex(HOOKED_pos);)
-    GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1))
-                   + HOOKED_texOff(st1 * vec2(+1, -1))
-                   + HOOKED_texOff(st1 * vec2(-1, +1))
-                   + HOOKED_texOff(st1 * vec2(-1, -1));)
-    GLSL(float st2 = 1.5;)
-    GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1,  0))
-
author	Niklas Haas <git@haasn.xyz>	2017-09-14 08:04:55 +0200
committer	Niklas Haas <git@haasn.xyz>	2017-09-21 15:00:55 +0200
commit	65979986a923a8f08019b257c3fe72cd5e8ecf68 (patch)
tree	b8f4b8c17d583594aef0ca509064f8b2ff7128d4 /video/out/opengl/video_shaders.c
parent	20f958c9775652c3213588c2a0824f5353276adc (diff)
download	mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.bz2 mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.xz