diff options
Diffstat (limited to 'video/out/opengl/video_shaders.c')
-rw-r--r-- | video/out/opengl/video_shaders.c | 78 |
1 files changed, 58 insertions, 20 deletions
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c index 3381d532b6..a7ecf1a448 100644 --- a/video/out/opengl/video_shaders.c +++ b/video/out/opengl/video_shaders.c @@ -521,7 +521,8 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); } -// Tone map from a known peak brightness to the range [0,1] +// Tone map from a known peak brightness to the range [0,1]. If ref_peak +// is 0, we will use peak detection instead static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, enum tone_mapping algo, float param, float desat) { @@ -531,8 +532,42 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSL(float luma = dot(src_luma, color.rgb);) GLSL(float luma_orig = luma;) + if (!ref_peak) { + // For performance, we want to do as few atomic operations on global + // memory as possible, so use an atomic in shmem for the work group. + // We also want slightly more stable values, so use the group average + // instead of the group max + GLSLHF("shared uint group_sum = 0;\n"); + GLSLF("atomicAdd(group_sum, uint(luma * %f));\n", MP_REF_WHITE); + + // Have one thread in each work group update the frame maximum + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0)) + GLSL(atomicMax(frame_max[index], group_sum / + (gl_WorkGroupSize.x * gl_WorkGroupSize.y));) + + // Finally, have one thread per invocation update the total maximum + // and advance the index + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation + GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); + GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n"); + GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE); + GLSL(index = next;) + GLSL(}) + + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSLF("const float sig_peak = 1.0/%f * float(sig_peak_raw);\n", + MP_REF_WHITE * PEAK_DETECT_FRAMES); + } else { + GLSLHF("const float sig_peak = %f;\n", ref_peak); + } + // Desaturate the color using a coefficient dependent on the brightness - if (desat > 0 && ref_peak > desat) { + if (desat > 0) { GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat); GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);) } @@ -542,23 +577,23 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSLF("luma = clamp(%f * luma, 0.0, 1.0);\n", isnan(param) ? 1.0 : param); break; - case TONE_MAPPING_MOBIUS: { - float j = isnan(param) ? 0.3 : param; - // solve for M(j) = j; M(ref_peak) = 1.0; M'(j) = 1.0 + case TONE_MAPPING_MOBIUS: + GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); + // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 // where M(x) = scale * (x+a)/(x+b) - float a = -j*j * (ref_peak - 1) / (j*j - 2*j + ref_peak), - b = (j*j - 2*j*ref_peak + ref_peak) / (ref_peak - 1); - - GLSLF("luma = mix(%f * (luma + %f) / (luma + %f), luma, luma <= %f);\n", - (b*b + 2*b*j + j*j) / (b - a), a, b, j); + GLSLF("const float a = -j*j * (sig_peak - 1) / (j*j - 2*j + sig_peak);\n"); + GLSLF("const float b = (j*j - 2*j*sig_peak + sig_peak) / " + "max(1e-6, sig_peak - 1);\n"); + GLSLF("const float scale = (b*b + 2*b*j + j*j) / (b-a);\n"); + GLSL(luma = mix(luma, scale * (luma + a) / (luma + b), luma > j);) break; - } case TONE_MAPPING_REINHARD: { float contrast = isnan(param) ? 0.5 : param, offset = (1.0 - contrast) / contrast; GLSLF("luma = luma / (luma + %f);\n", offset); - GLSLF("luma *= %f;\n", (ref_peak + offset) / ref_peak); + GLSLF("const float lumascale = (sig_peak + %f) / sig_peak;\n", offset); + GLSL(luma *= lumascale;) break; } @@ -568,20 +603,19 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n", A, C*B, D*E, A, B, D*F, E/F); GLSLHF("}\n"); - - GLSLF("luma = hable(luma) / hable(%f);\n", ref_peak); + GLSL(luma = hable(luma) / hable(sig_peak);) break; } case TONE_MAPPING_GAMMA: { float gamma = isnan(param) ? 1.8 : param; - GLSLF("luma = pow(luma * 1.0/%f, %f);\n", ref_peak, 1.0/gamma); + GLSLF("luma = pow(luma / sig_peak, %f);\n", 1.0/gamma); break; } case TONE_MAPPING_LINEAR: { float coeff = isnan(param) ? 1.0 : param; - GLSLF("luma = %f * luma;\n", coeff / ref_peak); + GLSLF("luma = %f / sig_peak * luma;\n", coeff); break; } @@ -596,11 +630,15 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, // Map colors from one source space to another. These source spaces must be // known (i.e. not MP_CSP_*_AUTO), as this function won't perform any // auto-guessing. If is_linear is true, we assume the input has already been -// linearized (e.g. for linear-scaling) +// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will +// detect the peak instead of relying on metadata. Note that this requires +// the caller to have already bound the appropriate SSBO and set up the +// compute shader metadata void pass_color_map(struct gl_shader_cache *sc, struct mp_colorspace src, struct mp_colorspace dst, enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool is_linear) + float tone_mapping_desat, bool detect_peak, + bool is_linear) { GLSLF("// color mapping\n"); @@ -643,8 +681,8 @@ void pass_color_map(struct gl_shader_cache *sc, // Tone map to prevent clipping when the source signal peak exceeds the // encodable range if (src.sig_peak > dst_range) { - pass_tone_map(sc, src.sig_peak / dst_range, algo, tone_mapping_param, - tone_mapping_desat); + float ref_peak = detect_peak ? 0 : src.sig_peak / dst_range; + pass_tone_map(sc, ref_peak, algo, tone_mapping_param, tone_mapping_desat); } // Adapt to the right colorspace if necessary |