diff options
Diffstat (limited to 'video/out/opengl/video_shaders.c')
-rw-r--r-- | video/out/opengl/video_shaders.c | 872 |
1 files changed, 0 insertions, 872 deletions
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c deleted file mode 100644 index 60c5ce82ac..0000000000 --- a/video/out/opengl/video_shaders.c +++ /dev/null @@ -1,872 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <math.h> - -#include "video_shaders.h" -#include "video.h" - -#define GLSL(x) gl_sc_add(sc, #x "\n"); -#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) -#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); -#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) - -// Set up shared/commonly used variables and macros -void sampler_prelude(struct gl_shader_cache *sc, int tex_num) -{ - GLSLF("#undef tex\n"); - GLSLF("#undef texmap\n"); - GLSLF("#define tex texture%d\n", tex_num); - GLSLF("#define texmap texmap%d\n", tex_num); - GLSLF("vec2 pos = texcoord%d;\n", tex_num); - GLSLF("vec2 size = texture_size%d;\n", tex_num); - GLSLF("vec2 pt = pixel_size%d;\n", tex_num); -} - -static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, - struct scaler *scaler) -{ - gl_sc_uniform_texture(sc, "lut", scaler->lut); - GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size); - - int N = scaler->kernel->size; - int width = (N + 3) / 4; // round up - - GLSLF("float weights[%d];\n", N); - for (int i = 0; i < N; i++) { - if (i % 4 == 0) - GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width); - GLSLF("weights[%d] = c[%d];\n", i, i % 4); - } -} - -// Handle a single pass (either vertical or horizontal). The direction is given -// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is -// used instead (samples from texture0 through textureN) -void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, - int d_x, int d_y) -{ - int N = scaler->kernel->size; - bool use_ar = scaler->conf.antiring > 0; - bool planar = d_x == 0 && d_y == 0; - GLSL(color = vec4(0.0);) - GLSLF("{\n"); - if (!planar) { - GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y); - GLSL(pt *= dir;) - GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);) - GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1); - } - GLSL(vec4 c;) - if (use_ar) { - GLSL(vec4 hi = vec4(0.0);) - GLSL(vec4 lo = vec4(1.0);) - } - pass_sample_separated_get_weights(sc, scaler); - GLSLF("// scaler samples\n"); - for (int n = 0; n < N; n++) { - if (planar) { - GLSLF("c = texture(texture%d, texcoord%d);\n", n, n); - } else { - GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n); - } - GLSLF("color += vec4(weights[%d]) * c;\n", n); - if (use_ar && (n == N/2-1 || n == N/2)) { - GLSL(lo = min(lo, c);) - GLSL(hi = max(hi, c);) - } - } - if (use_ar) - GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", - scaler->conf.antiring); - GLSLF("}\n"); -} - -// Subroutine for computing and adding an individual texel contribution -// If subtexel < 0 and offset < 0, samples directly. -// If subtexel >= 0, takes the texel from cN[subtexel] -// If offset >= 0, takes the texel from inN[rel.y+y+offset][rel.x+x+offset] -static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, - int x, int y, int subtexel, int offset, int components) -{ - double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale; - double radius_cutoff = scaler->kernel->radius_cutoff; - - // Since we can't know the subpixel position in advance, assume a - // worst case scenario - int yy = y > 0 ? y-1 : y; - int xx = x > 0 ? x-1 : x; - double dmax = sqrt(xx*xx + yy*yy); - // Skip samples definitely outside the radius - if (dmax >= radius_cutoff) - return; - GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); - // Check for samples that might be skippable - bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2; - if (maybe_skippable) - GLSLF("if (d < %f) {\n", radius_cutoff); - - // get the weight for this pixel - if (scaler->lut->params.dimensions == 1) { - GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n", - radius, scaler->lut_size); - } else { - GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n", - radius, scaler->lut_size); - } - GLSL(wsum += w;) - - if (subtexel < 0 && offset < 0) { - GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y); - GLSL(color += vec4(w) * c0;) - } else if (subtexel >= 0) { - for (int n = 0; n < components; n++) - GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel); - } else if (offset >= 0) { - for (int n = 0; n <components; n++) - GLSLF("color[%d] += w * in%d[rel.y+%d][rel.x+%d];\n", n, n, - y + offset, x + offset); - } else { - // invalid usage - abort(); - } - - if (maybe_skippable) - GLSLF("}\n"); -} - -void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, - int components, int glsl_version) -{ - GLSL(color = vec4(0.0);) - GLSLF("{\n"); - GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - GLSL(vec2 base = pos - fcoord * pt;) - GLSLF("float w, d, wsum = 0.0;\n"); - for (int n = 0; n < components; n++) - GLSLF("vec4 c%d;\n", n); - - gl_sc_uniform_texture(sc, "lut", scaler->lut); - - GLSLF("// scaler samples\n"); - int bound = ceil(scaler->kernel->radius_cutoff); - for (int y = 1-bound; y <= bound; y += 2) { - for (int x = 1-bound; x <= bound; x += 2) { - // First we figure out whether it's more efficient to use direct - // sampling or gathering. The problem is that gathering 4 texels - // only to discard some of them is very wasteful, so only do it if - // we suspect it will be a win rather than a loss. This is the case - // exactly when all four texels are within bounds - bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff; - - // textureGather is only supported in GLSL 400+ - if (glsl_version < 400) - use_gather = false; - - if (use_gather) { - // Gather the four surrounding texels simultaneously - for (int n = 0; n < components; n++) { - GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n", - n, x, y, n); - } - - // Mix in all of the points with their weights - for (int p = 0; p < 4; p++) { - // The four texels are gathered counterclockwise starting - // from the bottom left - static const int xo[4] = {0, 1, 1, 0}; - static const int yo[4] = {1, 1, 0, 0}; - if (x+xo[p] > bound || y+yo[p] > bound) - continue; - polar_sample(sc, scaler, x+xo[p], y+yo[p], p, -1, components); - } - } else { - // switch to direct sampling instead, for efficiency/compatibility - for (int yy = y; yy <= bound && yy <= y+1; yy++) { - for (int xx = x; xx <= bound && xx <= x+1; xx++) - polar_sample(sc, scaler, xx, yy, -1, -1, components); - } - } - } - } - - GLSL(color = color / vec4(wsum);) - GLSLF("}\n"); -} - -// bw/bh: block size -// iw/ih: input size (pre-calculated to fit all required texels) -void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, - int components, int bw, int bh, int iw, int ih) -{ - int bound = ceil(scaler->kernel->radius_cutoff); - int offset = bound - 1; // padding top/left - - GLSL(color = vec4(0.0);) - GLSLF("{\n"); - GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);) - GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));) - GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - GLSL(vec2 base = pos - pt * fcoord;) - GLSL(ivec2 rel = ivec2(round((base - wbase) * size));) - GLSLF("float w, d, wsum = 0.0;\n"); - gl_sc_uniform_texture(sc, "lut", scaler->lut); - - // Load all relevant texels into shmem - gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays"); - for (int c = 0; c < components; c++) - GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw); - - GLSL(vec4 c;) - GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh); - GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw); - GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset); - for (int c = 0; c < components; c++) - GLSLF("in%d[y][x] = c[%d];\n", c, c); - GLSLF("}}\n"); - GLSL(groupMemoryBarrier();) - GLSL(barrier();) - - // Dispatch the actual samples - GLSLF("// scaler samples\n"); - for (int y = 1-bound; y <= bound; y++) { - for (int x = 1-bound; x <= bound; x++) - polar_sample(sc, scaler, x, y, -1, offset, components); - } - - GLSL(color = color / vec4(wsum);) - GLSLF("}\n"); -} - -static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s) -{ - // Explanation of how bicubic scaling with only 4 texel fetches is done: - // http://www.mate.tue.nl/mate/pdfs/10318.pdf - // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' - // Explanation why this algorithm normally always blurs, even with unit - // scaling: - // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf - // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' - GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" - " + vec4(1, 0, -0.5, 0.5);\n", t, s); - GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); - GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); - GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); - GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s); -} - -void pass_sample_bicubic_fast(struct gl_shader_cache *sc) -{ - GLSLF("{\n"); - GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));) - bicubic_calcweights(sc, "parmx", "fcoord.x"); - bicubic_calcweights(sc, "parmy", "fcoord.y"); - GLSL(vec4 cdelta;) - GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);) - GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);) - // first y-interpolation - GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) - GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) - GLSL(vec4 ab = mix(ag, ar, parmy.b);) - // second y-interpolation - GLSL(vec4 br = texture(tex, pos + cdelta.zy);) - GLSL(vec4 bg = texture(tex, pos + cdelta.zw);) - GLSL(vec4 aa = mix(bg, br, parmy.b);) - // x-interpolation - GLSL(color = mix(aa, ab, parmx.b);) - GLSLF("}\n"); -} - -void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, - int w, int h) -{ - GLSLF("{\n"); - GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest - GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - // Determine the mixing coefficient vector - gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h}); - GLSL(vec2 coeff = fcoord * output_size/size;) - float threshold = scaler->conf.kernel.params[0]; - threshold = isnan(threshold) ? 0.0 : threshold; - GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold); - GLSL(coeff = clamp(coeff, 0.0, 1.0);) - // Compute the right blend of colors - GLSL(color = texture(tex, pos + pt * (coeff - fcoord));) - GLSLF("}\n"); -} - -// Common constants for SMPTE ST.2084 (HDR) -static const float PQ_M1 = 2610./4096 * 1./4, - PQ_M2 = 2523./4096 * 128, - PQ_C1 = 3424./4096, - PQ_C2 = 2413./4096 * 32, - PQ_C3 = 2392./4096 * 32; - -// Common constants for ARIB STD-B67 (HLG) -static const float HLG_A = 0.17883277, - HLG_B = 0.28466892, - HLG_C = 0.55991073; - -// Common constants for Panasonic V-Log -static const float VLOG_B = 0.00873, - VLOG_C = 0.241514, - VLOG_D = 0.598206; - -// Common constants for Sony S-Log -static const float SLOG_A = 0.432699, - SLOG_B = 0.037584, - SLOG_C = 0.616596 + 0.03, - SLOG_P = 3.538813, - SLOG_Q = 0.030001, - SLOG_K2 = 155.0 / 219.0; - -// Linearize (expand), given a TRC as input. In essence, this is the ITU-R -// EOTF, calculated on an idealized (reference) monitor with a white point of -// MP_REF_WHITE and infinite contrast. -void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) -{ - if (trc == MP_CSP_TRC_LINEAR) - return; - - GLSLF("// linearize\n"); - - // Note that this clamp may technically violate the definition of - // ITU-R BT.2100, which allows for sub-blacks and super-whites to be - // displayed on the display where such would be possible. That said, the - // problem is that not all gamma curves are well-defined on the values - // outside this range, so we ignore it and just clip anyway for sanity. - GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - - switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92), - pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), - lessThan(vec3(0.04045), color.rgb));) - break; - case MP_CSP_TRC_BT_1886: - GLSL(color.rgb = pow(color.rgb, vec3(2.4));) - break; - case MP_CSP_TRC_GAMMA18: - GLSL(color.rgb = pow(color.rgb, vec3(1.8));) - break; - case MP_CSP_TRC_GAMMA22: - GLSL(color.rgb = pow(color.rgb, vec3(2.2));) - break; - case MP_CSP_TRC_GAMMA28: - GLSL(color.rgb = pow(color.rgb, vec3(2.8));) - break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0), - pow(color.rgb, vec3(1.8)), - lessThan(vec3(0.03125), color.rgb));) - break; - case MP_CSP_TRC_PQ: - GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2); - GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" - " / (vec3(%f) - vec3(%f) * color.rgb);\n", - PQ_C1, PQ_C2, PQ_C3); - GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M1); - // PQ's output range is 0-10000, but we need it to be relative to to - // MP_REF_WHITE instead, so rescale - GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE); - break; - case MP_CSP_TRC_HLG: - GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n" - " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n" - " lessThan(vec3(0.5), color.rgb));\n", - HLG_C, HLG_A, HLG_B); - break; - case MP_CSP_TRC_V_LOG: - GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" - " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" - " - vec3(%f), \n" - " lessThanEqual(vec3(0.181), color.rgb)); \n", - VLOG_D, VLOG_C, VLOG_B); - break; - case MP_CSP_TRC_S_LOG1: - GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n" - " - vec3(%f);\n", - SLOG_C, SLOG_A, SLOG_B); - break; - case MP_CSP_TRC_S_LOG2: - GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" - " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" - " - vec3(%f)) * vec3(1.0/%f), \n" - " lessThanEqual(vec3(%f), color.rgb)); \n", - SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q); - break; - default: - abort(); - } - - // Rescale to prevent clipping on non-float textures - GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc)); -} - -// Delinearize (compress), given a TRC as output. This corresponds to the -// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a -// reference monitor. -void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) -{ - if (trc == MP_CSP_TRC_LINEAR) - return; - - GLSLF("// delinearize\n"); - GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc)); - - switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb * vec3(12.92), - vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) - - vec3(0.055), - lessThanEqual(vec3(0.0031308), color.rgb));) - break; - case MP_CSP_TRC_BT_1886: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) - break; - case MP_CSP_TRC_GAMMA18: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) - break; - case MP_CSP_TRC_GAMMA22: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) - break; - case MP_CSP_TRC_GAMMA28: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) - break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb * vec3(16.0), - pow(color.rgb, vec3(1.0/1.8)), - lessThanEqual(vec3(0.001953), color.rgb));) - break; - case MP_CSP_TRC_PQ: - GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE); - GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1); - GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" - " / (vec3(1.0) + vec3(%f) * color.rgb);\n", - PQ_C1, PQ_C2, PQ_C3); - GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2); - break; - case MP_CSP_TRC_HLG: - GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n" - " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n" - " lessThan(vec3(1.0), color.rgb));\n", - HLG_A, HLG_B, HLG_C); - break; - case MP_CSP_TRC_V_LOG: - GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" - " vec3(%f) * log(color.rgb + vec3(%f)) \n" - " + vec3(%f), \n" - " lessThanEqual(vec3(0.01), color.rgb)); \n", - VLOG_C / M_LN10, VLOG_B, VLOG_D); - break; - case MP_CSP_TRC_S_LOG1: - GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", - SLOG_A / M_LN10, SLOG_B, SLOG_C); - break; - case MP_CSP_TRC_S_LOG2: - GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" - " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" - " + vec3(%f), \n" - " lessThanEqual(vec3(0.0), color.rgb)); \n", - SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C); - break; - default: - abort(); - } -} - -// Apply the OOTF mapping from a given light type to display-referred light. -// The extra peak parameter is used to scale the values before and after -// the OOTF, and can be inferred using mp_trc_nom_peak -void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) -{ - if (light == MP_CSP_LIGHT_DISPLAY) - return; - - GLSLF("// apply ootf\n"); - GLSLF("color.rgb *= vec3(%f);\n", peak); - - switch (light) - { - case MP_CSP_LIGHT_SCENE_HLG: - // HLG OOTF from BT.2100, assuming a reference display with a - // peak of 1000 cd/m² -> gamma = 1.2 - GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), 0.2));\n", - (1000 / MP_REF_WHITE) / pow(12, 1.2)); - break; - case MP_CSP_LIGHT_SCENE_709_1886: - // This OOTF is defined by encoding the result as 709 and then decoding - // it as 1886; although this is called 709_1886 we actually use the - // more precise (by one decimal) values from BT.2020 instead - GLSL(color.rgb = mix(color.rgb * vec3(4.5), - vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), - lessThan(vec3(0.0181), color.rgb));) - GLSL(color.rgb = pow(color.rgb, vec3(2.4));) - break; - case MP_CSP_LIGHT_SCENE_1_2: - GLSL(color.rgb = pow(color.rgb, vec3(1.2));) - break; - default: - abort(); - } - - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); -} - -// Inverse of the function pass_ootf, for completeness' sake. -void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) -{ - if (light == MP_CSP_LIGHT_DISPLAY) - return; - - GLSLF("// apply inverse ootf\n"); - GLSLF("color.rgb *= vec3(%f);\n", peak); - - switch (light) - { - case MP_CSP_LIGHT_SCENE_HLG: - GLSLF("color.rgb *= vec3(1.0/%f);\n", (1000 / MP_REF_WHITE) / pow(12, 1.2)); - GLSL(color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), 0.2/1.2)));) - break; - case MP_CSP_LIGHT_SCENE_709_1886: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) - GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), - pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), - vec3(1/0.45)), - lessThan(vec3(0.08145), color.rgb));) - break; - case MP_CSP_LIGHT_SCENE_1_2: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));) - break; - default: - abort(); - } - - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); -} - -// Tone map from a known peak brightness to the range [0,1]. If ref_peak -// is 0, we will use peak detection instead -static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, - enum tone_mapping algo, float param, float desat) -{ - GLSLF("// HDR tone mapping\n"); - - // Desaturate the color using a coefficient dependent on the luminance - GLSL(float luma = dot(dst_luma, color.rgb);) - if (desat > 0) { - GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat); - GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);) - } - - // To prevent discoloration due to out-of-bounds clipping, we need to make - // sure to reduce the value range as far as necessary to keep the entire - // signal in range, so tone map based on the brightest component. - GLSL(float sig = max(max(color.r, color.g), color.b);) - GLSL(float sig_orig = sig;) - - if (!ref_peak) { - // For performance, we want to do as few atomic operations on global - // memory as possible, so use an atomic in shmem for the work group. - // We also want slightly more stable values, so use the group average - // instead of the group max - GLSLHF("shared uint group_sum = 0;\n"); - GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE); - - // Have one thread in each work group update the frame maximum - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_LocalInvocationIndex == 0)) - GLSL(atomicMax(frame_max[index], group_sum / - (gl_WorkGroupSize.x * gl_WorkGroupSize.y));) - - // Finally, have one thread per invocation update the total maximum - // and advance the index - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation - GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); - GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n"); - GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE); - GLSL(index = next;) - GLSL(}) - - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n", - MP_REF_WHITE * PEAK_DETECT_FRAMES); - } else { - GLSLHF("const float sig_peak = %f;\n", ref_peak); - } - - switch (algo) { - case TONE_MAPPING_CLIP: - GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); - break; - - case TONE_MAPPING_MOBIUS: - GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); - // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 - // where M(x) = scale * (x+a)/(x+b) - GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n"); - GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / " - "max(1e-6, sig_peak - 1.0);\n"); - GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); - GLSL(sig = mix(sig, scale * (sig + a) / (sig + b), sig > j);) - break; - - case TONE_MAPPING_REINHARD: { - float contrast = isnan(param) ? 0.5 : param, - offset = (1.0 - contrast) / contrast; - GLSLF("sig = sig / (sig + %f);\n", offset); - GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset); - GLSL(sig *= scale;) - break; - } - - case TONE_MAPPING_HABLE: { - float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; - GLSLHF("float hable(float x) {\n"); - GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n", - A, C*B, D*E, A, B, D*F, E/F); - GLSLHF("}\n"); - GLSL(sig = hable(sig) / hable(sig_peak);) - break; - } - - case TONE_MAPPING_GAMMA: { - float gamma = isnan(param) ? 1.8 : param; - GLSLF("const float cutoff = 0.05, gamma = %f;\n", 1.0/gamma); - GLSL(float scale = pow(cutoff / sig_peak, gamma) / cutoff;) - GLSL(sig = sig > cutoff ? pow(sig / sig_peak, gamma) : scale * sig;) - break; - } - - case TONE_MAPPING_LINEAR: { - float coeff = isnan(param) ? 1.0 : param; - GLSLF("sig = %f / sig_peak * sig;\n", coeff); - break; - } - - default: - abort(); - } - - // Apply the computed scale factor to the color, linearly to prevent - // discoloration - GLSL(color.rgb *= sig / sig_orig;) -} - -// Map colors from one source space to another. These source spaces must be -// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any -// auto-guessing. If is_linear is true, we assume the input has already been -// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will -// detect the peak instead of relying on metadata. Note that this requires -// the caller to have already bound the appropriate SSBO and set up the -// compute shader metadata -void pass_color_map(struct gl_shader_cache *sc, - struct mp_colorspace src, struct mp_colorspace dst, - enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool detect_peak, - bool gamut_warning, bool is_linear) -{ - GLSLF("// color mapping\n"); - - // Compute the highest encodable level - float src_range = mp_trc_nom_peak(src.gamma), - dst_range = mp_trc_nom_peak(dst.gamma); - float ref_peak = src.sig_peak / dst_range; - - // Some operations need access to the video's luma coefficients, so make - // them available - float rgb2xyz[3][3]; - mp_get_rgb2xyz_matrix(mp_get_csp_primaries(src.primaries), rgb2xyz); - gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz[1]); - mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz); - gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]); - - // All operations from here on require linear light as a starting point, - // so we linearize even if src.gamma == dst.gamma when one of the other - // operations needs it - bool need_gamma = src.gamma != dst.gamma || - src.primaries != dst.primaries || - src_range != dst_range || - src.sig_peak > dst_range || - src.light != dst.light; - - if (need_gamma && !is_linear) { - pass_linearize(sc, src.gamma); - is_linear= true; - } - - if (src.light != dst.light) - pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma)); - - // Rescale the signal to compensate for differences in the encoding range - // and reference white level. This is necessary because of how mpv encodes - // brightness in textures. - if (src_range != dst_range) { - GLSLF("// rescale value range;\n"); - GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range); - } - - // Adapt to the right colorspace if necessary - if (src.primaries != dst.primaries) { - struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries), - csp_dst = mp_get_csp_primaries(dst.primaries); - float m[3][3] = {{0}}; - mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m); - gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]); - GLSL(color.rgb = cms_matrix * color.rgb;) - // Since this can reduce the gamut, figure out by how much - for (int c = 0; c < 3; c++) - ref_peak = MPMAX(ref_peak, m[c][c]); - } - - // Tone map to prevent clipping when the source signal peak exceeds the - // encodable range or we've reduced the gamut - if (ref_peak > 1) { - pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo, - tone_mapping_param, tone_mapping_desat); - } - - if (src.light != dst.light) - pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma)); - - // Warn for remaining out-of-gamut colors is enabled - if (gamut_warning) { - GLSL(if (any(greaterThan(color.rgb, vec3(1.01))))) - GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert - } - - if (is_linear) - pass_delinearize(sc, dst.gamma); -} - -// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. -// Obtain random numbers by calling rand(h), followed by h = permute(h) to -// update the state. Assumes the texture was hooked. -static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) -{ - GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; }) - GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); }) - GLSLH(float rand(float x) { return fract(x * 1.0/41.0); }) - - // Initialize the PRNG by hashing the position + a random uniform - GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) - GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) - gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); -} - -struct deband_opts { - int enabled; - int iterations; - float threshold; - float range; - float grain; -}; - -const struct deband_opts deband_opts_def = { - .iterations = 1, - .threshold = 64.0, - .range = 16.0, - .grain = 48.0, -}; - -#define OPT_BASE_STRUCT struct deband_opts -const struct m_sub_options deband_conf = { - .opts = (const m_option_t[]) { - OPT_INTRANGE("iterations", iterations, 0, 1, 16), - OPT_FLOATRANGE("threshold", threshold, 0, 0.0, 4096.0), - OPT_FLOATRANGE("range", range, 0, 1.0, 64.0), - OPT_FLOATRANGE("grain", grain, 0, 0.0, 4096.0), - {0} - }, - .size = sizeof(struct deband_opts), - .defaults = &deband_opts_def, -}; - -// Stochastically sample a debanded result from a hooked texture. -void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, - AVLFG *lfg, enum mp_csp_trc trc) -{ - // Initialize the PRNG - GLSLF("{\n"); - prng_init(sc, lfg); - - // Helper: Compute a stochastic approximation of the avg color around a - // pixel - GLSLHF("vec4 average(float range, inout float h) {\n"); - // Compute a random rangle and distance - GLSLH(float dist = rand(h) * range; h = permute(h);) - GLSLH(float dir = rand(h) * 6.2831853; h = permute(h);) - GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));) - - // Sample at quarter-turn intervals around the source pixel - GLSLH(vec4 ref[4];) - GLSLH(ref[0] = HOOKED_texOff(vec2( o.x, o.y));) - GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y, o.x));) - GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));) - GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));) - - // Return the (normalized) average - GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;) - GLSLHF("}\n"); - - // Sample the source pixel - GLSL(color = HOOKED_tex(HOOKED_pos);) - GLSLF("vec4 avg, diff;\n"); - for (int i = 1; i <= opts->iterations; i++) { - // Sample the average pixel and use it instead of the original if - // the difference is below the given threshold - GLSLF("avg = average(%f, h);\n", i * opts->range); - GLSL(diff = abs(color - avg);) - GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n", - opts->threshold / (i * 16384.0)); - } - - // Add some random noise to smooth out residual differences - GLSL(vec3 noise;) - GLSL(noise.x = rand(h); h = permute(h);) - GLSL(noise.y = rand(h); h = permute(h);) - GLSL(noise.z = rand(h); h = permute(h);) - - // Noise is scaled to the signal level to prevent extreme noise for HDR - float gain = opts->grain/8192.0 / mp_trc_nom_peak(trc); - GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain); - GLSLF("}\n"); -} - -// Assumes the texture was hooked -void pass_sample_unsharp(struct gl_shader_cache *sc, float param) { - GLSLF("{\n"); - GLSL(float st1 = 1.2;) - GLSL(vec4 p = HOOKED_tex(HOOKED_pos);) - GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1)) - + HOOKED_texOff(st1 * vec2(+1, -1)) - + HOOKED_texOff(st1 * vec2(-1, +1)) - + HOOKED_texOff(st1 * vec2(-1, -1));) - GLSL(float st2 = 1.5;) - GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1, 0)) - + HOOKED_texOff(st2 * vec2( 0, +1)) - + HOOKED_texOff(st2 * vec2(-1, 0)) - + HOOKED_texOff(st2 * vec2( 0, -1));) - GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) - GLSLF("color = p + t * %f;\n", param); - GLSLF("}\n"); -} |