summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/video_shaders.c
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-17 21:39:06 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-24 17:19:31 +0200
commitb196cadf9f9f6ea210db9236c2b26523a9a2719f (patch)
tree7faa5a77c65d84e45c074eb248fe0b54a62288ad /video/out/opengl/video_shaders.c
parentaad6ba018a17eded2b3f4af2212e0123cfb29b79 (diff)
downloadmpv-b196cadf9f9f6ea210db9236c2b26523a9a2719f.tar.bz2
mpv-b196cadf9f9f6ea210db9236c2b26523a9a2719f.tar.xz
vo_opengl: support HDR peak detection
This is done via compute shaders. As a consequence, the tone mapping algorithms had to be rewritten to compute their known constants in GLSL (ahead of time), instead of doing it once. Didn't affect performance. Using shmem/SSBO atomics in this way is extremely fast on nvidia, but it might be slow on other platforms. Needs testing. Unfortunately, setting up the SSBO still requires OpenGL calls, which means I can't have it in video_shaders.c, where it belongs. But I'll defer worrying about that until the backend refactor, since then I'll be breaking up the video/video_shaders structure anyway.
Diffstat (limited to 'video/out/opengl/video_shaders.c')
-rw-r--r--video/out/opengl/video_shaders.c78
1 files changed, 58 insertions, 20 deletions
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 3381d532b6..a7ecf1a448 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -521,7 +521,8 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
}
-// Tone map from a known peak brightness to the range [0,1]
+// Tone map from a known peak brightness to the range [0,1]. If ref_peak
+// is 0, we will use peak detection instead
static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
enum tone_mapping algo, float param, float desat)
{
@@ -531,8 +532,42 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSL(float luma = dot(src_luma, color.rgb);)
GLSL(float luma_orig = luma;)
+ if (!ref_peak) {
+ // For performance, we want to do as few atomic operations on global
+ // memory as possible, so use an atomic in shmem for the work group.
+ // We also want slightly more stable values, so use the group average
+ // instead of the group max
+ GLSLHF("shared uint group_sum = 0;\n");
+ GLSLF("atomicAdd(group_sum, uint(luma * %f));\n", MP_REF_WHITE);
+
+ // Have one thread in each work group update the frame maximum
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSL(if (gl_LocalInvocationIndex == 0))
+ GLSL(atomicMax(frame_max[index], group_sum /
+ (gl_WorkGroupSize.x * gl_WorkGroupSize.y));)
+
+ // Finally, have one thread per invocation update the total maximum
+ // and advance the index
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation
+ GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
+ GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n");
+ GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE);
+ GLSL(index = next;)
+ GLSL(})
+
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSLF("const float sig_peak = 1.0/%f * float(sig_peak_raw);\n",
+ MP_REF_WHITE * PEAK_DETECT_FRAMES);
+ } else {
+ GLSLHF("const float sig_peak = %f;\n", ref_peak);
+ }
+
// Desaturate the color using a coefficient dependent on the brightness
- if (desat > 0 && ref_peak > desat) {
+ if (desat > 0) {
GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat);
GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);)
}
@@ -542,23 +577,23 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSLF("luma = clamp(%f * luma, 0.0, 1.0);\n", isnan(param) ? 1.0 : param);
break;
- case TONE_MAPPING_MOBIUS: {
- float j = isnan(param) ? 0.3 : param;
- // solve for M(j) = j; M(ref_peak) = 1.0; M'(j) = 1.0
+ case TONE_MAPPING_MOBIUS:
+ GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param);
+ // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
// where M(x) = scale * (x+a)/(x+b)
- float a = -j*j * (ref_peak - 1) / (j*j - 2*j + ref_peak),
- b = (j*j - 2*j*ref_peak + ref_peak) / (ref_peak - 1);
-
- GLSLF("luma = mix(%f * (luma + %f) / (luma + %f), luma, luma <= %f);\n",
- (b*b + 2*b*j + j*j) / (b - a), a, b, j);
+ GLSLF("const float a = -j*j * (sig_peak - 1) / (j*j - 2*j + sig_peak);\n");
+ GLSLF("const float b = (j*j - 2*j*sig_peak + sig_peak) / "
+ "max(1e-6, sig_peak - 1);\n");
+ GLSLF("const float scale = (b*b + 2*b*j + j*j) / (b-a);\n");
+ GLSL(luma = mix(luma, scale * (luma + a) / (luma + b), luma > j);)
break;
- }
case TONE_MAPPING_REINHARD: {
float contrast = isnan(param) ? 0.5 : param,
offset = (1.0 - contrast) / contrast;
GLSLF("luma = luma / (luma + %f);\n", offset);
- GLSLF("luma *= %f;\n", (ref_peak + offset) / ref_peak);
+ GLSLF("const float lumascale = (sig_peak + %f) / sig_peak;\n", offset);
+ GLSL(luma *= lumascale;)
break;
}
@@ -568,20 +603,19 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n",
A, C*B, D*E, A, B, D*F, E/F);
GLSLHF("}\n");
-
- GLSLF("luma = hable(luma) / hable(%f);\n", ref_peak);
+ GLSL(luma = hable(luma) / hable(sig_peak);)
break;
}
case TONE_MAPPING_GAMMA: {
float gamma = isnan(param) ? 1.8 : param;
- GLSLF("luma = pow(luma * 1.0/%f, %f);\n", ref_peak, 1.0/gamma);
+ GLSLF("luma = pow(luma / sig_peak, %f);\n", 1.0/gamma);
break;
}
case TONE_MAPPING_LINEAR: {
float coeff = isnan(param) ? 1.0 : param;
- GLSLF("luma = %f * luma;\n", coeff / ref_peak);
+ GLSLF("luma = %f / sig_peak * luma;\n", coeff);
break;
}
@@ -596,11 +630,15 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// Map colors from one source space to another. These source spaces must be
// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
// auto-guessing. If is_linear is true, we assume the input has already been
-// linearized (e.g. for linear-scaling)
+// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will
+// detect the peak instead of relying on metadata. Note that this requires
+// the caller to have already bound the appropriate SSBO and set up the
+// compute shader metadata
void pass_color_map(struct gl_shader_cache *sc,
struct mp_colorspace src, struct mp_colorspace dst,
enum tone_mapping algo, float tone_mapping_param,
- float tone_mapping_desat, bool is_linear)
+ float tone_mapping_desat, bool detect_peak,
+ bool is_linear)
{
GLSLF("// color mapping\n");
@@ -643,8 +681,8 @@ void pass_color_map(struct gl_shader_cache *sc,
// Tone map to prevent clipping when the source signal peak exceeds the
// encodable range
if (src.sig_peak > dst_range) {
- pass_tone_map(sc, src.sig_peak / dst_range, algo, tone_mapping_param,
- tone_mapping_desat);
+ float ref_peak = detect_peak ? 0 : src.sig_peak / dst_range;
+ pass_tone_map(sc, ref_peak, algo, tone_mapping_param, tone_mapping_desat);
}
// Adapt to the right colorspace if necessary