diff options
Diffstat (limited to 'video/out/gpu/video_shaders.c')
-rw-r--r-- | video/out/gpu/video_shaders.c | 382 |
1 files changed, 227 insertions, 155 deletions
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 51b62ad7db..e202818501 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -17,6 +17,8 @@ #include <math.h> +#include <libplacebo/colorspace.h> + #include "video_shaders.h" #include "video.h" @@ -41,7 +43,7 @@ static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, struct scaler *scaler) { gl_sc_uniform_texture(sc, "lut", scaler->lut); - GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size); + GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut->params.h); int N = scaler->kernel->size; int width = (N + 3) / 4; // round up @@ -103,7 +105,7 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, int x, int y, int components, bool planar) { - double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale; + double radius = scaler->kernel->radius * scaler->kernel->filter_scale; double radius_cutoff = scaler->kernel->radius_cutoff; // Since we can't know the subpixel position in advance, assume a @@ -123,10 +125,10 @@ static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, // get the weight for this pixel if (scaler->lut->params.dimensions == 1) { GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n", - radius, scaler->lut_size); + radius, scaler->lut->params.w); } else { GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n", - radius, scaler->lut_size); + radius, scaler->lut->params.h); } GLSL(wsum += w;) @@ -252,7 +254,7 @@ void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s) { // Explanation of how bicubic scaling with only 4 texel fetches is done: - // http://www.mate.tue.nl/mate/pdfs/10318.pdf + // <https://web.archive.org/web/20180720154854/http://www.mate.tue.nl/mate/pdfs/10318.pdf> // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' // Explanation why this algorithm normally always blurs, even with unit // scaling: @@ -337,10 +339,10 @@ static const float SLOG_A = 0.432699, // // These functions always output to a normalized scale of [0,1], for // convenience of the video.c code that calls it. To get the values in an -// absolute scale, multiply the result by `mp_trc_nom_peak(trc)` -void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +// absolute scale, multiply the result by `pl_color_transfer_nominal_peak(trc)` +void pass_linearize(struct gl_shader_cache *sc, enum pl_color_transfer trc) { - if (trc == MP_CSP_TRC_LINEAR) + if (trc == PL_COLOR_TRC_LINEAR) return; GLSLF("// linearize\n"); @@ -353,78 +355,84 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92), - pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), - lessThan(vec3(0.04045), color.rgb));) + case PL_COLOR_TRC_SRGB: + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/12.92), \n" + " pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), \n" + " %s(lessThan(vec3(0.04045), color.rgb))); \n", + gl_sc_bvec(sc, 3)); break; - case MP_CSP_TRC_BT_1886: + case PL_COLOR_TRC_BT_1886: GLSL(color.rgb = pow(color.rgb, vec3(2.4));) break; - case MP_CSP_TRC_GAMMA18: + case PL_COLOR_TRC_GAMMA18: GLSL(color.rgb = pow(color.rgb, vec3(1.8));) break; - case MP_CSP_TRC_GAMMA20: + case PL_COLOR_TRC_GAMMA20: GLSL(color.rgb = pow(color.rgb, vec3(2.0));) break; - case MP_CSP_TRC_GAMMA22: + case PL_COLOR_TRC_GAMMA22: GLSL(color.rgb = pow(color.rgb, vec3(2.2));) break; - case MP_CSP_TRC_GAMMA24: + case PL_COLOR_TRC_GAMMA24: GLSL(color.rgb = pow(color.rgb, vec3(2.4));) break; - case MP_CSP_TRC_GAMMA26: + case PL_COLOR_TRC_GAMMA26: GLSL(color.rgb = pow(color.rgb, vec3(2.6));) break; - case MP_CSP_TRC_GAMMA28: + case PL_COLOR_TRC_GAMMA28: GLSL(color.rgb = pow(color.rgb, vec3(2.8));) break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0), - pow(color.rgb, vec3(1.8)), - lessThan(vec3(0.03125), color.rgb));) + case PL_COLOR_TRC_PRO_PHOTO: + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/16.0), \n" + " pow(color.rgb, vec3(1.8)), \n" + " %s(lessThan(vec3(0.03125), color.rgb))); \n", + gl_sc_bvec(sc, 3)); break; - case MP_CSP_TRC_PQ: + case PL_COLOR_TRC_PQ: GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2); GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" " / (vec3(%f) - vec3(%f) * color.rgb);\n", PQ_C1, PQ_C2, PQ_C3); GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", 1.0 / PQ_M1); - // PQ's output range is 0-10000, but we need it to be relative to to + // PQ's output range is 0-10000, but we need it to be relative to // MP_REF_WHITE instead, so rescale GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE); break; - case MP_CSP_TRC_HLG: + case PL_COLOR_TRC_HLG: GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n" - " lessThan(vec3(0.5), color.rgb));\n", - HLG_C, HLG_A, HLG_B); + " %s(lessThan(vec3(0.5), color.rgb)));\n", + HLG_C, HLG_A, HLG_B, gl_sc_bvec(sc, 3)); + GLSLF("color.rgb *= vec3(1.0/%f);\n", MP_REF_WHITE_HLG); break; - case MP_CSP_TRC_V_LOG: + case PL_COLOR_TRC_V_LOG: GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f), \n" - " lessThanEqual(vec3(0.181), color.rgb)); \n", - VLOG_D, VLOG_C, VLOG_B); + " %s(lessThanEqual(vec3(0.181), color.rgb))); \n", + VLOG_D, VLOG_C, VLOG_B, gl_sc_bvec(sc, 3)); break; - case MP_CSP_TRC_S_LOG1: + case PL_COLOR_TRC_S_LOG1: GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n" " - vec3(%f);\n", SLOG_C, SLOG_A, SLOG_B); break; - case MP_CSP_TRC_S_LOG2: + case PL_COLOR_TRC_S_LOG2: GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f)) * vec3(1.0/%f), \n" - " lessThanEqual(vec3(%f), color.rgb)); \n", - SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q); + " %s(lessThanEqual(vec3(%f), color.rgb))); \n", + SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, gl_sc_bvec(sc, 3), SLOG_Q); + break; + case PL_COLOR_TRC_ST428: + GLSL(color.rgb = vec3(52.37/48.0) * pow(color.rgb, vec3(2.6));); break; default: abort(); } // Rescale to prevent clipping on non-float textures - GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc)); + GLSLF("color.rgb *= vec3(1.0/%f);\n", pl_color_transfer_nominal_peak(trc)); } // Delinearize (compress), given a TRC as output. This corresponds to the @@ -432,49 +440,51 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) // reference monitor. // // Like pass_linearize, this functions ingests values on an normalized scale -void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +void pass_delinearize(struct gl_shader_cache *sc, enum pl_color_transfer trc) { - if (trc == MP_CSP_TRC_LINEAR) + if (trc == PL_COLOR_TRC_LINEAR) return; GLSLF("// delinearize\n"); GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc)); + GLSLF("color.rgb *= vec3(%f);\n", pl_color_transfer_nominal_peak(trc)); switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb * vec3(12.92), - vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) - - vec3(0.055), - lessThanEqual(vec3(0.0031308), color.rgb));) + case PL_COLOR_TRC_SRGB: + GLSLF("color.rgb = mix(color.rgb * vec3(12.92), \n" + " vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) \n" + " - vec3(0.055), \n" + " %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n", + gl_sc_bvec(sc, 3)); break; - case MP_CSP_TRC_BT_1886: + case PL_COLOR_TRC_BT_1886: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) break; - case MP_CSP_TRC_GAMMA18: + case PL_COLOR_TRC_GAMMA18: GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) break; - case MP_CSP_TRC_GAMMA20: + case PL_COLOR_TRC_GAMMA20: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.0));) break; - case MP_CSP_TRC_GAMMA22: + case PL_COLOR_TRC_GAMMA22: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) break; - case MP_CSP_TRC_GAMMA24: + case PL_COLOR_TRC_GAMMA24: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) break; - case MP_CSP_TRC_GAMMA26: + case PL_COLOR_TRC_GAMMA26: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.6));) break; - case MP_CSP_TRC_GAMMA28: + case PL_COLOR_TRC_GAMMA28: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb * vec3(16.0), - pow(color.rgb, vec3(1.0/1.8)), - lessThanEqual(vec3(0.001953), color.rgb));) + case PL_COLOR_TRC_PRO_PHOTO: + GLSLF("color.rgb = mix(color.rgb * vec3(16.0), \n" + " pow(color.rgb, vec3(1.0/1.8)), \n" + " %s(lessThanEqual(vec3(0.001953), color.rgb))); \n", + gl_sc_bvec(sc, 3)); break; - case MP_CSP_TRC_PQ: + case PL_COLOR_TRC_PQ: GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE); GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1); GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" @@ -482,29 +492,33 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) PQ_C1, PQ_C2, PQ_C3); GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2); break; - case MP_CSP_TRC_HLG: + case PL_COLOR_TRC_HLG: + GLSLF("color.rgb *= vec3(%f);\n", MP_REF_WHITE_HLG); GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n" - " lessThan(vec3(1.0), color.rgb));\n", - HLG_A, HLG_B, HLG_C); + " %s(lessThan(vec3(1.0), color.rgb)));\n", + HLG_A, HLG_B, HLG_C, gl_sc_bvec(sc, 3)); break; - case MP_CSP_TRC_V_LOG: + case PL_COLOR_TRC_V_LOG: GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" " vec3(%f) * log(color.rgb + vec3(%f)) \n" " + vec3(%f), \n" - " lessThanEqual(vec3(0.01), color.rgb)); \n", - VLOG_C / M_LN10, VLOG_B, VLOG_D); + " %s(lessThanEqual(vec3(0.01), color.rgb))); \n", + VLOG_C / M_LN10, VLOG_B, VLOG_D, gl_sc_bvec(sc, 3)); break; - case MP_CSP_TRC_S_LOG1: + case PL_COLOR_TRC_S_LOG1: GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", SLOG_A / M_LN10, SLOG_B, SLOG_C); break; - case MP_CSP_TRC_S_LOG2: + case PL_COLOR_TRC_S_LOG2: GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" " + vec3(%f), \n" - " lessThanEqual(vec3(0.0), color.rgb)); \n", - SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C); + " %s(lessThanEqual(vec3(0.0), color.rgb))); \n", + SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C, gl_sc_bvec(sc, 3)); + break; + case PL_COLOR_TRC_ST428: + GLSL(color.rgb = pow(color.rgb * vec3(48.0/52.37), vec3(1.0/2.6));); break; default: abort(); @@ -528,16 +542,17 @@ static void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, // HLG OOTF from BT.2100, scaled to the chosen display peak float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), %f));\n", - peak / pow(12, gamma), gamma - 1.0); + peak / pow(12.0 / MP_REF_WHITE_HLG, gamma), gamma - 1.0); break; } case MP_CSP_LIGHT_SCENE_709_1886: // This OOTF is defined by encoding the result as 709 and then decoding // it as 1886; although this is called 709_1886 we actually use the // more precise (by one decimal) values from BT.2020 instead - GLSL(color.rgb = mix(color.rgb * vec3(4.5), - vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), - lessThan(vec3(0.0181), color.rgb));) + GLSLF("color.rgb = mix(color.rgb * vec3(4.5), \n" + " vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n" + " %s(lessThan(vec3(0.0181), color.rgb))); \n", + gl_sc_bvec(sc, 3)); GLSL(color.rgb = pow(color.rgb, vec3(2.4));) break; case MP_CSP_LIGHT_SCENE_1_2: @@ -561,17 +576,18 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh { case MP_CSP_LIGHT_SCENE_HLG: { float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak / pow(12, gamma)); + GLSLF("color.rgb *= vec3(1.0/%f);\n", peak / pow(12.0 / MP_REF_WHITE_HLG, gamma)); GLSLF("color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), %f)));\n", (gamma - 1.0) / gamma); break; } case MP_CSP_LIGHT_SCENE_709_1886: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) - GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), - pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), - vec3(1/0.45)), - lessThan(vec3(0.08145), color.rgb));) + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5), \n" + " pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n" + " vec3(1/0.45)), \n" + " %s(lessThan(vec3(0.08145), color.rgb))); \n", + gl_sc_bvec(sc, 3)); break; case MP_CSP_LIGHT_SCENE_1_2: GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));) @@ -601,7 +617,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc, // pixel using shared memory first GLSLH(shared int wg_sum;) GLSLH(shared uint wg_max;) - GLSL(wg_sum = 0; wg_max = 0;) + GLSL(wg_sum = 0; wg_max = 0u;) GLSL(barrier();) GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min); GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale); @@ -610,7 +626,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc, // Have one thread per work group update the global atomics GLSL(memoryBarrierShared();) GLSL(barrier();) - GLSL(if (gl_LocalInvocationIndex == 0) {) + GLSL(if (gl_LocalInvocationIndex == 0u) {) GLSL( int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);) GLSL( atomicAdd(frame_sum, wg_avg);) GLSL( atomicMax(frame_max, wg_max);) @@ -620,8 +636,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc, // Finally, to update the global state, we increment a counter per dispatch GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) - GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {) - GLSL( counter = 0;) + GLSL(if (gl_LocalInvocationIndex == 0u && atomicAdd(counter, 1u) == num_wg - 1u) {) + GLSL( counter = 0u;) GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale); GLSL( cur.x = exp(cur.x);) @@ -630,9 +646,12 @@ static void hdr_update_peak(struct gl_shader_cache *sc, // Use an IIR low-pass filter to smooth out the detected values, with a // configurable decay rate based on the desired time constant (tau) - float a = 1.0 - cos(1.0 / opts->decay_rate); - float decay = sqrt(a*a + 2*a) - a; - GLSLF(" average += %f * (cur - average);\n", decay); + if (opts->decay_rate) { + float decay = 1.0f - expf(-1.0f / opts->decay_rate); + GLSLF(" average += %f * (cur - average);\n", decay); + } else { + GLSLF(" average = cur;\n"); + } // Scene change hysteresis float log_db = 10.0 / log(10.0); @@ -642,11 +661,20 @@ static void hdr_update_peak(struct gl_shader_cache *sc, GLSL( average = mix(average, cur, weight);) // Reset SSBO state for the next frame - GLSL( frame_sum = 0; frame_max = 0;) + GLSL( frame_sum = 0; frame_max = 0u;) GLSL( memoryBarrierBuffer();) GLSL(}) } +static inline float pq_delinearize(float x) +{ + x *= MP_REF_WHITE / 10000.0; + x = powf(x, PQ_M1); + x = (PQ_C1 + PQ_C2 * x) / (1.0 + PQ_C3 * x); + x = pow(x, PQ_M2); + return x; +} + // Tone map from a known peak brightness to the range [0,1]. If ref_peak // is 0, we will use peak detection instead static void pass_tone_map(struct gl_shader_cache *sc, @@ -668,14 +696,23 @@ static void pass_tone_map(struct gl_shader_cache *sc, if (opts->compute_peak >= 0) hdr_update_peak(sc, opts); - GLSLF("vec3 sig = color.rgb;\n"); + // Always hard-clip the upper bound of the signal range to avoid functions + // exploding on inputs greater than 1.0 + GLSLF("vec3 sig = min(color.rgb, sig_peak);\n"); + + // This function always operates on an absolute scale, so ignore the + // dst_peak normalization for it + float dst_scale = dst_peak; + enum tone_mapping curve = opts->curve ? opts->curve : TONE_MAPPING_BT_2390; + if (curve == TONE_MAPPING_BT_2390) + dst_scale = 1.0; // Rescale the variables in order to bring it into a representation where // 1.0 represents the dst_peak. This is because all of the tone mapping // algorithms are defined in such a way that they map to the range [0.0, 1.0]. - if (dst_peak > 1.0) { - GLSLF("sig *= 1.0/%f;\n", dst_peak); - GLSLF("sig_peak *= 1.0/%f;\n", dst_peak); + if (dst_scale > 1.0) { + GLSLF("sig *= 1.0/%f;\n", dst_scale); + GLSLF("sig_peak *= 1.0/%f;\n", dst_scale); } GLSL(float sig_orig = sig[sig_idx];) @@ -684,9 +721,9 @@ static void pass_tone_map(struct gl_shader_cache *sc, GLSL(sig_peak *= slope;) float param = opts->curve_param; - switch (opts->curve) { + switch (curve) { case TONE_MAPPING_CLIP: - GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); + GLSLF("sig = min(%f * sig, 1.0);\n", isnan(param) ? 1.0 : param); break; case TONE_MAPPING_MOBIUS: @@ -699,7 +736,8 @@ static void pass_tone_map(struct gl_shader_cache *sc, "max(1e-6, sig_peak - 1.0);\n"); GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b))," - " greaterThan(sig, vec3(j)));\n"); + " %s(greaterThan(sig, vec3(j))));\n", + gl_sc_bvec(sc, 3)); GLSLF("}\n"); break; @@ -732,34 +770,62 @@ static void pass_tone_map(struct gl_shader_cache *sc, GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;) GLSLF("sig = mix(scale * sig," " pow(sig / sig_peak, vec3(gamma))," - " greaterThan(sig, vec3(cutoff)));\n"); + " %s(greaterThan(sig, vec3(cutoff))));\n", + gl_sc_bvec(sc, 3)); break; } case TONE_MAPPING_LINEAR: { float coeff = isnan(param) ? 1.0 : param; - GLSLF("sig = %f / sig_peak * sig;\n", coeff); + GLSLF("sig = min(%f / sig_peak, 1.0) * sig;\n", coeff); break; } + case TONE_MAPPING_BT_2390: + // We first need to encode both sig and sig_peak into PQ space + GLSLF("vec4 sig_pq = vec4(sig.rgb, sig_peak); \n" + "sig_pq *= vec4(1.0/%f); \n" + "sig_pq = pow(sig_pq, vec4(%f)); \n" + "sig_pq = (vec4(%f) + vec4(%f) * sig_pq) \n" + " / (vec4(1.0) + vec4(%f) * sig_pq); \n" + "sig_pq = pow(sig_pq, vec4(%f)); \n", + 10000.0 / MP_REF_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); + // Encode both the signal and the target brightness to be relative to + // the source peak brightness, and figure out the target peak in this space + GLSLF("float scale = 1.0 / sig_pq.a; \n" + "sig_pq.rgb *= vec3(scale); \n" + "float maxLum = %f * scale; \n", + pq_delinearize(dst_peak)); + // Apply piece-wise hermite spline + GLSLF("float ks = 1.5 * maxLum - 0.5; \n" + "vec3 tb = (sig_pq.rgb - vec3(ks)) / vec3(1.0 - ks); \n" + "vec3 tb2 = tb * tb; \n" + "vec3 tb3 = tb2 * tb; \n" + "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) + \n" + " (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) + \n" + " (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum); \n" + "sig = mix(pb, sig_pq.rgb, %s(lessThan(sig_pq.rgb, vec3(ks)))); \n", + gl_sc_bvec(sc, 3)); + // Convert back from PQ space to linear light + GLSLF("sig *= vec3(sig_pq.a); \n" + "sig = pow(sig, vec3(1.0/%f)); \n" + "sig = max(sig - vec3(%f), 0.0) / \n" + " (vec3(%f) - vec3(%f) * sig); \n" + "sig = pow(sig, vec3(1.0/%f)); \n" + "sig *= vec3(%f); \n", + PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / MP_REF_WHITE); + break; + default: abort(); } - GLSL(sig = min(sig, vec3(1.0));) - GLSL(vec3 sig_lin = color.rgb * (sig[sig_idx] / sig_orig);) - - // Mix between the per-channel tone mapped and the linear tone mapped - // signal based on the desaturation strength - if (opts->desat > 0) { - float base = 0.18 * dst_peak; - GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / " - " max(sig[sig_idx], 1.0);\n", base); - GLSLF("coeff = %f * pow(coeff, %f);\n", opts->desat, opts->desat_exp); - GLSLF("color.rgb = mix(sig_lin, %f * sig, coeff);\n", dst_peak); - } else { - GLSL(color.rgb = sig_lin;) - } + GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / \n" + " max(sig[sig_idx], 1.0); \n" + "coeff = %f * pow(coeff / %f, %f); \n" + "color.rgb *= sig[sig_idx] / sig_orig; \n" + "color.rgb = mix(color.rgb, %f * sig, coeff); \n", + 0.18 / dst_scale, 0.90, dst_scale, 0.20, dst_scale); } // Map colors from one source space to another. These source spaces must be @@ -770,78 +836,92 @@ static void pass_tone_map(struct gl_shader_cache *sc, // the caller to have already bound the appropriate SSBO and set up the compute // shader metadata void pass_color_map(struct gl_shader_cache *sc, bool is_linear, - struct mp_colorspace src, struct mp_colorspace dst, + struct pl_color_space src, struct pl_color_space dst, + enum mp_csp_light src_light, enum mp_csp_light dst_light, const struct gl_tone_map_opts *opts) { GLSLF("// color mapping\n"); // Some operations need access to the video's luma coefficients, so make // them available - float rgb2xyz[3][3]; - mp_get_rgb2xyz_matrix(mp_get_csp_primaries(src.primaries), rgb2xyz); - gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz[1]); - mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz); - gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]); - - bool need_ootf = src.light != dst.light; - if (src.light == MP_CSP_LIGHT_SCENE_HLG && src.sig_peak != dst.sig_peak) + pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(src.primaries)); + gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz.m[1]); + rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(dst.primaries)); + gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz.m[1]); + + bool need_ootf = src_light != dst_light; + if (src_light == MP_CSP_LIGHT_SCENE_HLG && src.hdr.max_luma != dst.hdr.max_luma) need_ootf = true; // All operations from here on require linear light as a starting point, - // so we linearize even if src.gamma == dst.gamma when one of the other + // so we linearize even if src.gamma == dst.transfer when one of the other // operations needs it - bool need_linear = src.gamma != dst.gamma || + bool need_linear = src.transfer != dst.transfer || src.primaries != dst.primaries || - src.sig_peak != dst.sig_peak || + src.hdr.max_luma != dst.hdr.max_luma || need_ootf; if (need_linear && !is_linear) { // We also pull it up so that 1.0 is the reference white - pass_linearize(sc, src.gamma); + pass_linearize(sc, src.transfer); is_linear = true; } // Pre-scale the incoming values into an absolute scale - GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(src.gamma)); + GLSLF("color.rgb *= vec3(%f);\n", pl_color_transfer_nominal_peak(src.transfer)); if (need_ootf) - pass_ootf(sc, src.light, src.sig_peak); + pass_ootf(sc, src_light, src.hdr.max_luma / MP_REF_WHITE); // Tone map to prevent clipping due to excessive brightness - if (src.sig_peak > dst.sig_peak) - pass_tone_map(sc, src.sig_peak, dst.sig_peak, opts); + if (src.hdr.max_luma > dst.hdr.max_luma) { + pass_tone_map(sc, src.hdr.max_luma / MP_REF_WHITE, + dst.hdr.max_luma / MP_REF_WHITE, opts); + } // Adapt to the right colorspace if necessary if (src.primaries != dst.primaries) { - struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries), - csp_dst = mp_get_csp_primaries(dst.primaries); - float m[3][3] = {{0}}; - mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m); - gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]); + const struct pl_raw_primaries *csp_src = pl_raw_primaries_get(src.primaries), + *csp_dst = pl_raw_primaries_get(dst.primaries); + pl_matrix3x3 m = pl_get_color_mapping_matrix(csp_src, csp_dst, + PL_INTENT_RELATIVE_COLORIMETRIC); + gl_sc_uniform_mat3(sc, "cms_matrix", true, &m.m[0][0]); GLSL(color.rgb = cms_matrix * color.rgb;) + + if (!opts->gamut_mode || opts->gamut_mode == GAMUT_DESATURATE) { + GLSL(float cmin = min(min(color.r, color.g), color.b);) + GLSL(if (cmin < 0.0) { + float luma = dot(dst_luma, color.rgb); + float coeff = cmin / (cmin - luma); + color.rgb = mix(color.rgb, vec3(luma), coeff); + }) + GLSLF("float cmax = 1.0/%f * max(max(color.r, color.g), color.b);\n", + dst.hdr.max_luma / MP_REF_WHITE); + GLSL(if (cmax > 1.0) color.rgb /= cmax;) + } } if (need_ootf) - pass_inverse_ootf(sc, dst.light, dst.sig_peak); + pass_inverse_ootf(sc, dst_light, dst.hdr.max_luma / MP_REF_WHITE); // Post-scale the outgoing values from absolute scale to normalized. // For SDR, we normalize to the chosen signal peak. For HDR, we normalize // to the encoding range of the transfer function. - float dst_range = dst.sig_peak; - if (mp_trc_is_hdr(dst.gamma)) - dst_range = mp_trc_nom_peak(dst.gamma); + float dst_range = dst.hdr.max_luma / MP_REF_WHITE; + if (pl_color_space_is_hdr(&dst)) + dst_range = pl_color_transfer_nominal_peak(dst.transfer); GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range); - // Warn for remaining out-of-gamut colors is enabled - if (opts->gamut_warning) { - GLSL(if (any(greaterThan(color.rgb, vec3(1.01))) || - any(lessThan(color.rgb, vec3(0.0))))) + // Warn for remaining out-of-gamut colors if enabled + if (opts->gamut_mode == GAMUT_WARN) { + GLSL(if (any(greaterThan(color.rgb, vec3(1.005))) || + any(lessThan(color.rgb, vec3(-0.005))))) GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert } if (is_linear) - pass_delinearize(sc, dst.gamma); + pass_delinearize(sc, dst.transfer); } // Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. @@ -864,28 +944,20 @@ static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); } -struct deband_opts { - int enabled; - int iterations; - float threshold; - float range; - float grain; -}; - const struct deband_opts deband_opts_def = { .iterations = 1, - .threshold = 64.0, + .threshold = 48.0, .range = 16.0, - .grain = 48.0, + .grain = 32.0, }; #define OPT_BASE_STRUCT struct deband_opts const struct m_sub_options deband_conf = { .opts = (const m_option_t[]) { - OPT_INTRANGE("iterations", iterations, 0, 1, 16), - OPT_FLOATRANGE("threshold", threshold, 0, 0.0, 4096.0), - OPT_FLOATRANGE("range", range, 0, 1.0, 64.0), - OPT_FLOATRANGE("grain", grain, 0, 0.0, 4096.0), + {"iterations", OPT_INT(iterations), M_RANGE(0, 16)}, + {"threshold", OPT_FLOAT(threshold), M_RANGE(0.0, 4096.0)}, + {"range", OPT_FLOAT(range), M_RANGE(1.0, 64.0)}, + {"grain", OPT_FLOAT(grain), M_RANGE(0.0, 4096.0)}, {0} }, .size = sizeof(struct deband_opts), @@ -894,7 +966,7 @@ const struct m_sub_options deband_conf = { // Stochastically sample a debanded result from a hooked texture. void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, - AVLFG *lfg, enum mp_csp_trc trc) + AVLFG *lfg, enum pl_color_transfer trc) { // Initialize the PRNG GLSLF("{\n"); @@ -927,8 +999,8 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, // the difference is below the given threshold GLSLF("avg = average(%f, h);\n", i * opts->range); GLSL(diff = abs(color - avg);) - GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n", - opts->threshold / (i * 16384.0)); + GLSLF("color = mix(avg, color, %s(greaterThan(diff, vec4(%f))));\n", + gl_sc_bvec(sc, 4), opts->threshold / (i * 16384.0)); } // Add some random noise to smooth out residual differences @@ -938,7 +1010,7 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, GLSL(noise.z = rand(h); h = permute(h);) // Noise is scaled to the signal level to prevent extreme noise for HDR - float gain = opts->grain/8192.0 / mp_trc_nom_peak(trc); + float gain = opts->grain/8192.0 / pl_color_transfer_nominal_peak(trc); GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain); GLSLF("}\n"); } |