vo_opengl: support HDR peak detection

This is done via compute shaders. As a consequence, the tone mapping algorithms had to be rewritten to compute their known constants in GLSL (ahead of time), instead of doing it once. Didn't affect performance. Using shmem/SSBO atomics in this way is extremely fast on nvidia, but it might be slow on other platforms. Needs testing. Unfortunately, setting up the SSBO still requires OpenGL calls, which means I can't have it in video_shaders.c, where it belongs. But I'll defer worrying about that until the backend refactor, since then I'll be breaking up the video/video_shaders structure anyway.
author: Niklas Haas <git@haasn.xyz> 2017-07-17 21:39:06 +0200
committer: Niklas Haas <git@haasn.xyz> 2017-07-24 17:19:31 +0200
commit: b196cadf9f9f6ea210db9236c2b26523a9a2719f (patch)
tree: 7faa5a77c65d84e45c074eb248fe0b54a62288ad /video/out/opengl/video_shaders.c
parent: aad6ba018a17eded2b3f4af2212e0123cfb29b79 (diff)
download: mpv-b196cadf9f9f6ea210db9236c2b26523a9a2719f.tar.bz2
mpv-b196cadf9f9f6ea210db9236c2b26523a9a2719f.tar.xz
1 files changed, 58 insertions, 20 deletions
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 3381d532b6..a7ecf1a448 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -521,7 +521,8 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
     GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
 }
 
-// Tone map from a known peak brightness to the range [0,1]
+// Tone map from a known peak brightness to the range [0,1]. If ref_peak
+// is 0, we will use peak detection instead
 static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
                           enum tone_mapping algo, float param, float desat)
 {
@@ -531,8 +532,42 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
     GLSL(float luma = dot(src_luma, color.rgb);)
     GLSL(float luma_orig = luma;)
 
+    if (!ref_peak) {
+        // For performance, we want to do as few atomic operations on global
+        // memory as possible, so use an atomic in shmem for the work group.
+        // We also want slightly more stable values, so use the group average
+        // instead of the group max
+        GLSLHF("shared uint group_sum = 0;\n");
+        GLSLF("atomicAdd(group_sum, uint(luma * %f));\n", MP_REF_WHITE);
+
+        // Have one thread in each work group update the frame maximum
+        GLSL(memoryBarrierBuffer();)
+        GLSL(barrier();)
+        GLSL(if (gl_LocalInvocationIndex == 0))
+            GLSL(atomicMax(frame_max[index], group_sum /
+                 (gl_WorkGroupSize.x * gl_WorkGroupSize.y));)
+
+        // Finally, have one thread per invocation update the total maximum
+        // and advance the index
+        GLSL(memoryBarrierBuffer();)
+        GLSL(barrier();)
+        GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation
+            GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
+            GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n");
+            GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE);
+            GLSL(index = next;)
+        GLSL(})
+
+        GLSL(memoryBarrierBuffer();)
+        GLSL(barrier();)
+        GLSLF("const float sig_peak = 1.0/%f * float(sig_peak_raw);\n",
+              MP_REF_WHITE * PEAK_DETECT_FRAMES);
+    } else {
+        GLSLHF("const float sig_peak = %f;\n", ref_peak);
+    }
+
     // Desaturate the color using a coefficient dependent on the brightness
-    if (desat > 0 && ref_peak > desat) {
+    if (desat > 0) {
         GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat);
         GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);)
     }
@@ -542,23 +577,23 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
         GLSLF("luma = clamp(%f * luma, 0.0, 1.0);\n", isnan(param) ? 1.0 : param);
         break;
 
-    case TONE_MAPPING_MOBIUS: {
-        float j = isnan(param) ? 0.3 : param;
-        // solve for M(j) = j; M(ref_peak) = 1.0; M'(j) = 1.0
+    case TONE_MAPPING_MOBIUS:
+        GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param);
+        // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
         // where M(x) = scale * (x+a)/(x+b)
-        float a = -j*j * (ref_peak - 1) / (j*j - 2*j + ref_peak),
-              b = (j*j - 2*j*ref_peak + ref_peak) / (ref_peak - 1);
-
-        GLSLF("luma = mix(%f * (luma + %f) / (luma + %f), luma, luma <= %f);\n",
-              (b*b + 2*b*j + j*j) / (b - a), a, b, j);
+        GLSLF("const float a = -j*j * (sig_peak - 1) / (j*j - 2*j + sig_peak);\n");
+        GLSLF("const float b = (j*j - 2*j*sig_peak + sig_peak) / "
+              "max(1e-6, sig_peak - 1);\n");
+        GLSLF("const float scale = (b*b + 2*b*j + j*j) / (b-a);\n");
+        GLSL(luma = mix(luma, scale * (luma + a) / (luma + b), luma > j);)
         break;
-    }
 
     case TONE_MAPPING_REINHARD: {
         float contrast = isnan(param) ? 0.5 : param,
               offset = (1.0 - contrast) / contrast;
         GLSLF("luma = luma / (luma + %f);\n", offset);
-        GLSLF("luma *= %f;\n", (ref_peak + offset) / ref_peak);
+        GLSLF("const float lumascale = (sig_peak + %f) / sig_peak;\n", offset);
+        GLSL(luma *= lumascale;)
         break;
     }
 
@@ -568,20 +603,19 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
         GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n",
                A, C*B, D*E, A, B, D*F, E/F);
         GLSLHF("}\n");
-
-        GLSLF("luma = hable(luma) / hable(%f);\n", ref_peak);
+        GLSL(luma = hable(luma) / hable(sig_peak);)
         break;
     }
 
     case TONE_MAPPING_GAMMA: {
         float gamma = isnan(param) ? 1.8 : param;
-        GLSLF("luma = pow(luma * 1.0/%f, %f);\n", ref_peak, 1.0/gamma);
+        GLSLF("luma = pow(luma / sig_peak, %f);\n", 1.0/gamma);
         break;
     }
 
     case TONE_MAPPING_LINEAR: {
         float coeff = isnan(param) ? 1.0 : param;
-        GLSLF("luma = %f * luma;\n", coeff / ref_peak);
+        GLSLF("luma = %f / sig_peak * luma;\n", coeff);
         break;
     }
 
@@ -596,11 +630,15 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
 // Map colors from one source space to another. These source spaces must be
 // known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
 // auto-guessing. If is_linear is true, we assume the input has already been
-// linearized (e.g. for linear-scaling)
+// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will
+// detect the peak instead of relying on metadata. Note that this requires
+// the caller to have already bound the appropriate SSBO and set up the
+// compute shader metadata
 void pass_color_map(struct gl_shader_cache *sc,
                     struct mp_colorspace src, struct mp_colorspace dst,
                     enum tone_mapping algo, float tone_mapping_param,
-                    float tone_mapping_desat, bool is_linear)
+                    float tone_mapping_desat, bool detect_peak,
+                    bool is_linear)
 {
     GLSLF("// color mapping\n");
 
@@ -643,8 +681,8 @@ void pass_color_map(struct gl_shader_cache *sc,
     // Tone map to prevent clipping when the source signal peak exceeds the
     // encodable range
     if (src.sig_peak > dst_range) {
-        pass_tone_map(sc, src.sig_peak / dst_range, algo, tone_mapping_param,
-                      tone_mapping_desat);
+        float ref_peak = detect_peak ? 0 : src.sig_peak / dst_range;
+        pass_tone_map(sc, ref_peak, algo, tone_mapping_param, tone_mapping_desat);
     }
 
     // Adapt to the right colorspace if necessary
author	Niklas Haas <git@haasn.xyz>	2017-07-17 21:39:06 +0200
committer	Niklas Haas <git@haasn.xyz>	2017-07-24 17:19:31 +0200
commit	b196cadf9f9f6ea210db9236c2b26523a9a2719f (patch)
tree	7faa5a77c65d84e45c074eb248fe0b54a62288ad /video/out/opengl/video_shaders.c
parent	aad6ba018a17eded2b3f4af2212e0123cfb29b79 (diff)
download	mpv-b196cadf9f9f6ea210db9236c2b26523a9a2719f.tar.bz2 mpv-b196cadf9f9f6ea210db9236c2b26523a9a2719f.tar.xz