From 05f0980b9610c3d0f75f8004578ae61d3e3145e4 Mon Sep 17 00:00:00 2001
From: Kotori Itsuka <cmdrkotori@gmail.com>
Date: Thu, 17 Jan 2019 20:20:37 +1000
Subject: vo_gpu: allow resetting target-peak to the trc default

Add "auto" the possible values of target-peak.  The default value
for target_peak is to calculate the target using mp_trc_nom_peak.
Unfortunately, this default was outside the acceptable range of
10-10000 nits, which prevented its later reassignment.  So add an
"auto" choice to target-peak which lets clients and scripts go back
to using the trc default after assigning a value.
---
 video/out/gpu/video.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index b0fa9eb4d9..c12fb8536c 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -351,7 +351,8 @@ const struct m_sub_options gl_video_conf = {
         OPT_FLAG("gamma-auto", gamma_auto, 0),
         OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
         OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
-        OPT_INTRANGE("target-peak", target_peak, 0, 10, 10000),
+        OPT_CHOICE_OR_INT("target-peak", target_peak, 0, 10, 10000,
+                          ({"auto", 0})),
         OPT_CHOICE("tone-mapping", tone_mapping, 0,
                    ({"clip",     TONE_MAPPING_CLIP},
                     {"mobius",   TONE_MAPPING_MOBIUS},
-- 
cgit v1.2.3


From 3fe882d4ae80fa060a71dad0d6d1605afcfe98b6 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Thu, 27 Dec 2018 18:34:19 +0100
Subject: vo_gpu: improve tone mapping desaturation

Instead of desaturating towards luma, we desaturate towards the
per-channel tone mapped version. This essentially proves a smooth
roll-off towards the "hollywood"-style (non-chromatic) tone mapping
algorithm, which works better for bright content, while continuing to
use the "linear" style (chromatic) tone mapping algorithm for primarily
in-gamut content.

We also split up the desaturation algorithm into strength and exponent,
which allows users to use less aggressive desaturation settings without
affecting the overall curve.
---
 video/out/gpu/video.c         |  41 ++++++++---------
 video/out/gpu/video.h         |  15 ++++---
 video/out/gpu/video_shaders.c | 101 ++++++++++++++++++++++--------------------
 video/out/gpu/video_shaders.h |   6 +--
 4 files changed, 87 insertions(+), 76 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index c12fb8536c..9ffdc62d20 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -313,9 +313,12 @@ static const struct gl_video_opts gl_video_opts_def = {
     .alpha_mode = ALPHA_BLEND_TILES,
     .background = {0, 0, 0, 255},
     .gamma = 1.0f,
-    .tone_mapping = TONE_MAPPING_HABLE,
-    .tone_mapping_param = NAN,
-    .tone_mapping_desat = 0.5,
+    .tone_map = {
+        .curve = TONE_MAPPING_HABLE,
+        .curve_param = NAN,
+        .desat = 0.75,
+        .desat_exp = 1.5,
+    },
     .early_flush = -1,
     .hwdec_interop = "auto",
 };
@@ -353,20 +356,22 @@ const struct m_sub_options gl_video_conf = {
         OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
         OPT_CHOICE_OR_INT("target-peak", target_peak, 0, 10, 10000,
                           ({"auto", 0})),
-        OPT_CHOICE("tone-mapping", tone_mapping, 0,
+        OPT_CHOICE("tone-mapping", tone_map.curve, 0,
                    ({"clip",     TONE_MAPPING_CLIP},
                     {"mobius",   TONE_MAPPING_MOBIUS},
                     {"reinhard", TONE_MAPPING_REINHARD},
                     {"hable",    TONE_MAPPING_HABLE},
                     {"gamma",    TONE_MAPPING_GAMMA},
                     {"linear",   TONE_MAPPING_LINEAR})),
-        OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0,
+        OPT_CHOICE("hdr-compute-peak", tone_map.compute_peak, 0,
                    ({"auto", 0},
                     {"yes", 1},
                     {"no", -1})),
-        OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
-        OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0),
-        OPT_FLAG("gamut-warning", gamut_warning, 0),
+        OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0),
+        OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
+        OPT_FLOATRANGE("tone-mapping-desaturate-exponent",
+                       tone_map.desat_exp, 0, 0.0, 20.0),
+        OPT_FLAG("gamut-warning", tone_map.gamut_warning, 0),
         OPT_FLAG("opengl-pbo", pbo, 0),
         SCALER_OPTS("scale",  SCALER_SCALE),
         SCALER_OPTS("dscale", SCALER_DSCALE),
@@ -2472,7 +2477,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
     if (!dst.sig_peak)
         dst.sig_peak = mp_trc_nom_peak(dst.gamma);
 
-    bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma);
+    struct gl_tone_map_opts tone_map = p->opts.tone_map;
+    bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma);
     if (detect_peak && !p->hdr_peak_ssbo) {
         struct {
             uint32_t counter;
@@ -2493,8 +2499,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
         p->hdr_peak_ssbo = ra_buf_create(ra, &params);
         if (!p->hdr_peak_ssbo) {
             MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n");
+            tone_map.compute_peak = p->opts.tone_map.compute_peak = -1;
             detect_peak = false;
-            p->opts.compute_hdr_peak = -1;
         }
     }
 
@@ -2515,9 +2521,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
     }
 
     // Adapt from src to dst as necessary
-    pass_color_map(p->sc, src, dst, p->opts.tone_mapping,
-                   p->opts.tone_mapping_param, p->opts.tone_mapping_desat,
-                   detect_peak, p->opts.gamut_warning, p->use_linear && !osd);
+    pass_color_map(p->sc, p->use_linear && !osd, src, dst, &tone_map);
 
     if (p->use_lut_3d) {
         gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture);
@@ -3583,12 +3587,12 @@ static void check_gl_features(struct gl_video *p)
     }
 
     bool have_compute_peak = have_compute && have_ssbo;
-    if (!have_compute_peak && p->opts.compute_hdr_peak >= 0) {
-        int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V;
+    if (!have_compute_peak && p->opts.tone_map.compute_peak >= 0) {
+        int msgl = p->opts.tone_map.compute_peak == 1 ? MSGL_WARN : MSGL_V;
         MP_MSG(p, msgl, "Disabling HDR peak computation (one or more of the "
                         "following is not supported: compute shaders=%d, "
                         "SSBO=%d).\n", have_compute, have_ssbo);
-        p->opts.compute_hdr_peak = -1;
+        p->opts.tone_map.compute_peak = -1;
     }
 
     p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg;
@@ -3610,7 +3614,6 @@ static void check_gl_features(struct gl_video *p)
             .alpha_mode = p->opts.alpha_mode,
             .use_rectangle = p->opts.use_rectangle,
             .background = p->opts.background,
-            .compute_hdr_peak = p->opts.compute_hdr_peak,
             .dither_algo = p->opts.dither_algo,
             .dither_depth = p->opts.dither_depth,
             .dither_size = p->opts.dither_size,
@@ -3618,9 +3621,7 @@ static void check_gl_features(struct gl_video *p)
             .temporal_dither_period = p->opts.temporal_dither_period,
             .tex_pad_x = p->opts.tex_pad_x,
             .tex_pad_y = p->opts.tex_pad_y,
-            .tone_mapping = p->opts.tone_mapping,
-            .tone_mapping_param = p->opts.tone_mapping_param,
-            .tone_mapping_desat = p->opts.tone_mapping_desat,
+            .tone_map = p->opts.tone_map,
             .early_flush = p->opts.early_flush,
             .icc_opts = p->opts.icc_opts,
             .hwdec_interop = p->opts.hwdec_interop,
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index ca8b6f65d4..ee5c0a2861 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -98,6 +98,15 @@ enum tone_mapping {
 // How many frames to average over for HDR peak detection
 #define PEAK_DETECT_FRAMES 63
 
+struct gl_tone_map_opts {
+    int curve;
+    float curve_param;
+    int compute_peak;
+    float desat;
+    float desat_exp;
+    int gamut_warning; // bool
+};
+
 struct gl_video_opts {
     int dumb_mode;
     struct scaler_config scaler[4];
@@ -107,11 +116,7 @@ struct gl_video_opts {
     int target_prim;
     int target_trc;
     int target_peak;
-    int tone_mapping;
-    int compute_hdr_peak;
-    float tone_mapping_param;
-    float tone_mapping_desat;
-    int gamut_warning;
+    struct gl_tone_map_opts tone_map;
     int correct_downscaling;
     int linear_downscaling;
     int linear_upscaling;
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 342fb39ded..315e15cc89 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -580,7 +580,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc)
 
     // Have each thread update the work group sum with the local value
     GLSL(barrier();)
-    GLSLF("atomicAdd(wg_sum, uint(sig * %f));\n", MP_REF_WHITE);
+    GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE);
 
     // Have one thread per work group update the global atomics. We use the
     // work group average even for the global sum, to make the values slightly
@@ -642,48 +642,42 @@ static void hdr_update_peak(struct gl_shader_cache *sc)
 
 // Tone map from a known peak brightness to the range [0,1]. If ref_peak
 // is 0, we will use peak detection instead
-static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
+static void pass_tone_map(struct gl_shader_cache *sc,
                           float src_peak, float dst_peak,
-                          enum tone_mapping algo, float param, float desat)
+                          const struct gl_tone_map_opts *opts)
 {
     GLSLF("// HDR tone mapping\n");
 
     // To prevent discoloration due to out-of-bounds clipping, we need to make
     // sure to reduce the value range as far as necessary to keep the entire
     // signal in range, so tone map based on the brightest component.
-    GLSL(float sig = max(max(color.r, color.g), color.b);)
+    GLSL(int sig_idx = 0;)
+    GLSL(if (color[1] > color[sig_idx]) sig_idx = 1;)
+    GLSL(if (color[2] > color[sig_idx]) sig_idx = 2;)
+    GLSL(float sig_max = color[sig_idx];)
     GLSLF("float sig_peak = %f;\n", src_peak);
     GLSLF("float sig_avg = %f;\n", sdr_avg);
 
-    if (detect_peak)
+    if (opts->compute_peak >= 0)
         hdr_update_peak(sc);
 
+    GLSLF("vec3 sig = color.rgb;\n");
+
     // Rescale the variables in order to bring it into a representation where
     // 1.0 represents the dst_peak. This is because all of the tone mapping
     // algorithms are defined in such a way that they map to the range [0.0, 1.0].
     if (dst_peak > 1.0) {
-        GLSLF("sig *= %f;\n", 1.0 / dst_peak);
-        GLSLF("sig_peak *= %f;\n", 1.0 / dst_peak);
+        GLSLF("sig *= 1.0/%f;\n", dst_peak);
+        GLSLF("sig_peak *= 1.0/%f;\n", dst_peak);
     }
 
-    GLSL(float sig_orig = sig;)
+    GLSL(float sig_orig = sig[sig_idx];)
     GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg);
     GLSL(sig *= slope;)
     GLSL(sig_peak *= slope;)
 
-    // Desaturate the color using a coefficient dependent on the signal.
-    // Do this after peak detection in order to prevent over-desaturating
-    // overly bright souces
-    if (desat > 0) {
-        float base = 0.18 * dst_peak;
-        GLSL(float luma = dot(dst_luma, color.rgb);)
-        GLSLF("float coeff = max(sig - %f, 1e-6) / max(sig, 1e-6);\n", base);
-        GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat);
-        GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);)
-        GLSL(sig = mix(sig, luma * slope, coeff);) // also make sure to update `sig`
-    }
-
-    switch (algo) {
+    float param = opts->curve_param;
+    switch (opts->curve) {
     case TONE_MAPPING_CLIP:
         GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param);
         break;
@@ -697,14 +691,15 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
         GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / "
               "max(1e-6, sig_peak - 1.0);\n");
         GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n");
-        GLSL(sig = sig > j ? scale * (sig + a) / (sig + b) : sig;)
+        GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b)),"
+              "          greaterThan(sig, vec3(j)));\n");
         GLSLF("}\n");
         break;
 
     case TONE_MAPPING_REINHARD: {
         float contrast = isnan(param) ? 0.5 : param,
               offset = (1.0 - contrast) / contrast;
-        GLSLF("sig = sig / (sig + %f);\n", offset);
+        GLSLF("sig = sig / (sig + vec3(%f));\n", offset);
         GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset);
         GLSL(sig *= scale;)
         break;
@@ -712,19 +707,25 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
 
     case TONE_MAPPING_HABLE: {
         float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;
-        GLSLHF("float hable(float x) {\n");
-        GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n",
-               A, C*B, D*E, A, B, D*F, E/F);
+        GLSLHF("vec3 hable(vec3 x) {\n");
+        GLSLHF("return (x * (%f*x + vec3(%f)) + vec3(%f)) / "
+               "       (x * (%f*x + vec3(%f)) + vec3(%f)) "
+               "       - vec3(%f);\n",
+               A, C*B, D*E,
+               A, B, D*F,
+               E/F);
         GLSLHF("}\n");
-        GLSL(sig = hable(sig) / hable(sig_peak);)
+        GLSLF("sig = hable(max(vec3(0.0), sig)) / hable(vec3(sig_peak)).x;\n");
         break;
     }
 
     case TONE_MAPPING_GAMMA: {
         float gamma = isnan(param) ? 1.8 : param;
-        GLSLF("const float cutoff = 0.05, gamma = %f;\n", 1.0/gamma);
-        GLSL(float scale = pow(cutoff / sig_peak, gamma) / cutoff;)
-        GLSL(sig = sig > cutoff ? pow(sig / sig_peak, gamma) : scale * sig;)
+        GLSLF("const float cutoff = 0.05, gamma = 1.0/%f;\n", gamma);
+        GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;)
+        GLSLF("sig = mix(scale * sig,"
+              "          pow(sig / sig_peak, vec3(gamma)),"
+              "          greaterThan(sig, vec3(cutoff)));\n");
         break;
     }
 
@@ -738,24 +739,32 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
         abort();
     }
 
-    // Apply the computed scale factor to the color, linearly to prevent
-    // discoloration
-    GLSL(sig = min(sig, 1.0);)
-    GLSL(color.rgb *= vec3(sig / sig_orig);)
+    GLSL(sig = min(sig, vec3(1.0));)
+    GLSL(vec3 sig_lin = color.rgb * (sig[sig_idx] / sig_orig);)
+
+    // Mix between the per-channel tone mapped and the linear tone mapped
+    // signal based on the desaturation strength
+    if (opts->desat > 0) {
+        float base = 0.18 * dst_peak;
+        GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / "
+              "              max(sig[sig_idx], 1.0);\n", base);
+        GLSLF("coeff = %f * pow(coeff, %f);\n", opts->desat, opts->desat_exp);
+        GLSLF("color.rgb = mix(sig_lin, %f * sig, coeff);\n", dst_peak);
+    } else {
+        GLSL(color.rgb = sig_lin;)
+    }
 }
 
 // Map colors from one source space to another. These source spaces must be
 // known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
 // auto-guessing. If is_linear is true, we assume the input has already been
-// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will
-// detect the peak instead of relying on metadata. Note that this requires
-// the caller to have already bound the appropriate SSBO and set up the
-// compute shader metadata
-void pass_color_map(struct gl_shader_cache *sc,
+// linearized (e.g. for linear-scaling). If `opts->compute_peak` is true, we
+// will detect the peak instead of relying on metadata. Note that this requires
+// the caller to have already bound the appropriate SSBO and set up the compute
+// shader metadata
+void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
                     struct mp_colorspace src, struct mp_colorspace dst,
-                    enum tone_mapping algo, float tone_mapping_param,
-                    float tone_mapping_desat, bool detect_peak,
-                    bool gamut_warning, bool is_linear)
+                    const struct gl_tone_map_opts *opts)
 {
     GLSLF("// color mapping\n");
 
@@ -803,10 +812,8 @@ void pass_color_map(struct gl_shader_cache *sc,
 
     // Tone map to prevent clipping when the source signal peak exceeds the
     // encodable range or we've reduced the gamut
-    if (src.sig_peak > dst.sig_peak) {
-        pass_tone_map(sc, detect_peak, src.sig_peak, dst.sig_peak, algo,
-                      tone_mapping_param, tone_mapping_desat);
-    }
+    if (src.sig_peak > dst.sig_peak)
+        pass_tone_map(sc, src.sig_peak, dst.sig_peak, opts);
 
     if (need_ootf)
         pass_inverse_ootf(sc, dst.light, dst.sig_peak);
@@ -821,7 +828,7 @@ void pass_color_map(struct gl_shader_cache *sc,
     GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range);
 
     // Warn for remaining out-of-gamut colors is enabled
-    if (gamut_warning) {
+    if (opts->gamut_warning) {
         GLSL(if (any(greaterThan(color.rgb, vec3(1.01)))))
             GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert
     }
diff --git a/video/out/gpu/video_shaders.h b/video/out/gpu/video_shaders.h
index cd395d6377..f20d643e99 100644
--- a/video/out/gpu/video_shaders.h
+++ b/video/out/gpu/video_shaders.h
@@ -40,11 +40,9 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
 void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
 void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
 
-void pass_color_map(struct gl_shader_cache *sc,
+void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
                     struct mp_colorspace src, struct mp_colorspace dst,
-                    enum tone_mapping algo, float tone_mapping_param,
-                    float tone_mapping_desat, bool use_detected_peak,
-                    bool gamut_warning, bool is_linear);
+                    const struct gl_tone_map_opts *opts);
 
 void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
                         AVLFG *lfg, enum mp_csp_trc trc);
-- 
cgit v1.2.3


From 6179dcbb798aa9e3501af82ae46975e881d80626 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Tue, 1 Jan 2019 07:30:00 +0100
Subject: vo_gpu: redesign peak detection algorithm

The previous approach of using an FIR with tunable hard threshold for
scene changes had several problems:

- the FIR involved annoying hard-coded buffer sizes, high VRAM usage,
  and the FIR sum was prone to numerical overflow which limited the
  number of frames we could average over. We also totally redesign the
  scene change detection.

- the hard scene change detection was prone to both false positives and
  false negatives, each with their own (annoying) issues.

Scrap this entirely and switch to a dual approach of using a simple
single-pole IIR low pass filter to smooth out noise, while using a
softer scene change curve (with tunable low and high thresholds), based
on `smoothstep`. The IIR filter is extremely simple in its
implementation and has an arbitrarily user-tunable cutoff frequency,
while the smoothstep-based scene change curve provides a good, tunable
tradeoff between adaptation speed and stability - without exhibiting
either of the traditional issues associated with the hard cutoff.

Another way to think about the new options is that the "low threshold"
provides a margin of error within which we don't care about small
fluctuations in the scene (which will therefore be smoothed out by the
IIR filter).
---
 video/out/gpu/video.c         | 36 ++++++++--------
 video/out/gpu/video.h         |  6 +--
 video/out/gpu/video_shaders.c | 96 +++++++++++++++++--------------------------
 3 files changed, 61 insertions(+), 77 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 9ffdc62d20..a29f09bc3d 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -316,6 +316,9 @@ static const struct gl_video_opts gl_video_opts_def = {
     .tone_map = {
         .curve = TONE_MAPPING_HABLE,
         .curve_param = NAN,
+        .decay_rate = 100.0,
+        .scene_threshold_low = 50,
+        .scene_threshold_high = 200,
         .desat = 0.75,
         .desat_exp = 1.5,
     },
@@ -367,6 +370,11 @@ const struct m_sub_options gl_video_conf = {
                    ({"auto", 0},
                     {"yes", 1},
                     {"no", -1})),
+        OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0),
+        OPT_INTRANGE("hdr-scene-threshold-low",
+                     tone_map.scene_threshold_low, 0, 0, 10000),
+        OPT_INTRANGE("hdr-scene-threshold-high",
+                     tone_map.scene_threshold_high, 0, 0, 10000),
         OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0),
         OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
         OPT_FLOATRANGE("tone-mapping-desaturate-exponent",
@@ -2478,17 +2486,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
         dst.sig_peak = mp_trc_nom_peak(dst.gamma);
 
     struct gl_tone_map_opts tone_map = p->opts.tone_map;
-    bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma);
+    bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma)
+                       && src.sig_peak > dst.sig_peak;
+
     if (detect_peak && !p->hdr_peak_ssbo) {
         struct {
+            float average[2];
+            uint32_t frame_sum;
+            uint32_t frame_max;
             uint32_t counter;
-            uint32_t frame_idx;
-            uint32_t frame_num;
-            uint32_t frame_max[PEAK_DETECT_FRAMES+1];
-            uint32_t frame_sum[PEAK_DETECT_FRAMES+1];
-            uint32_t total_max;
-            uint32_t total_sum;
-        } peak_ssbo = {0};
+        } peak_ssbo = {
+            .average = { 0.25, src.sig_peak },
+        };
 
         struct ra_buf_params params = {
             .type = RA_BUF_TYPE_SHADER_STORAGE,
@@ -2508,15 +2517,10 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
         pass_describe(p, "detect HDR peak");
         pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
         gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
+            "vec2 average;"
+            "uint frame_sum;"
+            "uint frame_max;"
             "uint counter;"
-            "uint frame_idx;"
-            "uint frame_num;"
-            "uint frame_max[%d];"
-            "uint frame_avg[%d];"
-            "uint total_max;"
-            "uint total_avg;",
-            PEAK_DETECT_FRAMES + 1,
-            PEAK_DETECT_FRAMES + 1
         );
     }
 
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index ee5c0a2861..077f69332f 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -95,13 +95,13 @@ enum tone_mapping {
     TONE_MAPPING_LINEAR,
 };
 
-// How many frames to average over for HDR peak detection
-#define PEAK_DETECT_FRAMES 63
-
 struct gl_tone_map_opts {
     int curve;
     float curve_param;
     int compute_peak;
+    float decay_rate;
+    int scene_threshold_low;
+    int scene_threshold_high;
     float desat;
     float desat_exp;
     int gamut_warning; // bool
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 315e15cc89..0fff8f05f2 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -567,75 +567,55 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh
 // under a typical presentation gamma of about 2.0.
 static const float sdr_avg = 0.25;
 
-// The threshold for which to consider an average luminance difference to be
-// a sign of a scene change.
-static const int scene_threshold = 0.2 * MP_REF_WHITE;
-
-static void hdr_update_peak(struct gl_shader_cache *sc)
+static void hdr_update_peak(struct gl_shader_cache *sc,
+                            const struct gl_tone_map_opts *opts)
 {
-    // For performance, we want to do as few atomic operations on global
-    // memory as possible, so use an atomic in shmem for the work group.
-    GLSLH(shared uint wg_sum;);
-    GLSL(wg_sum = 0;)
-
-    // Have each thread update the work group sum with the local value
+    // Update the sig_peak/sig_avg from the old SSBO state
+    GLSL(sig_avg  = max(1e-3, average.x);)
+    GLSL(sig_peak = max(1.00, average.y);)
+
+    // For performance, and to avoid overflows, we tally up the sub-results per
+    // pixel using shared memory first
+    GLSLH(shared uint wg_sum;)
+    GLSLH(shared uint wg_max;)
+    GLSL(wg_sum = wg_max = 0;)
     GLSL(barrier();)
-    GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE);
+    GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
+    GLSL(atomicAdd(wg_sum, sig_uint);)
+    GLSL(atomicMax(wg_max, sig_uint);)
 
-    // Have one thread per work group update the global atomics. We use the
-    // work group average even for the global sum, to make the values slightly
-    // more stable and smooth out tiny super-highlights.
+    // Have one thread per work group update the global atomics
     GLSL(memoryBarrierShared();)
     GLSL(barrier();)
     GLSL(if (gl_LocalInvocationIndex == 0) {)
     GLSL(    uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
-    GLSL(    atomicMax(frame_max[frame_idx], wg_avg);)
-    GLSL(    atomicAdd(frame_avg[frame_idx], wg_avg);)
+    GLSL(    atomicAdd(frame_sum, wg_avg);)
+    GLSL(    atomicMax(frame_max, wg_max);)
+    GLSL(    memoryBarrierBuffer();)
     GLSL(})
-
-    const float refi = 1.0 / MP_REF_WHITE;
-
-    // Update the sig_peak/sig_avg from the old SSBO state
-    GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
-    GLSL(if (frame_num > 0) {)
-    GLSLF("    float peak = %f * float(total_max) / float(frame_num);\n", refi);
-    GLSLF("    float avg = %f * float(total_avg) / float(frame_num);\n", refi);
-    GLSLF("    sig_peak = max(1.0, peak);\n");
-    GLSLF("    sig_avg  = max(%f, avg);\n", sdr_avg);
-    GLSL(});
+    GLSL(barrier();)
 
     // Finally, to update the global state, we increment a counter per dispatch
-    GLSL(memoryBarrierBuffer();)
-    GLSL(barrier();)
+    GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
     GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
-
-    // Since we sum up all the workgroups, we also still need to divide the
-    // average by the number of work groups
     GLSL(    counter = 0;)
-    GLSL(    frame_avg[frame_idx] /= num_wg;)
-    GLSL(    uint cur_max = frame_max[frame_idx];)
-    GLSL(    uint cur_avg = frame_avg[frame_idx];)
-
-    // Scene change detection
-    GLSL(    int diff = int(frame_num * cur_avg) - int(total_avg);)
-    GLSLF("  if (abs(diff) > frame_num * %d) {\n", scene_threshold);
-    GLSL(        frame_num = 0;)
-    GLSL(        total_max = total_avg = 0;)
-    GLSLF("      for (uint i = 0; i < %d; i++)\n", PEAK_DETECT_FRAMES+1);
-    GLSL(            frame_max[i] = frame_avg[i] = 0;)
-    GLSL(        frame_max[frame_idx] = cur_max;)
-    GLSL(        frame_avg[frame_idx] = cur_avg;)
-    GLSL(    })
-
-    // Add the current frame, then subtract and reset the next frame
-    GLSLF("  uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
-    GLSL(    total_max += cur_max - frame_max[next];)
-    GLSL(    total_avg += cur_avg - frame_avg[next];)
-    GLSL(    frame_max[next] = frame_avg[next] = 0;)
-
-    // Update the index and count
-    GLSL(    frame_idx = next;)
-    GLSLF("  frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES);
+    GLSL(    vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
+    GLSLF("  cur *= 1.0/%f;\n", MP_REF_WHITE);
+
+    // Use an IIR low-pass filter to smooth out the detected values, with a
+    // configurable decay rate based on the desired time constant (tau)
+    float a = 1.0 - cos(1.0 / opts->decay_rate);
+    float decay = sqrt(a*a + 2*a) - a;
+    GLSLF("  average += %f * (cur - average);\n", decay);
+
+    // Scene change hysteresis
+    GLSLF("  float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n",
+          (float) opts->scene_threshold_low / MP_REF_WHITE,
+          (float) opts->scene_threshold_high / MP_REF_WHITE);
+    GLSL(    average = mix(average, cur, weight);)
+
+    // Reset SSBO state for the next frame
+    GLSL(    frame_max = frame_sum = 0;)
     GLSL(    memoryBarrierBuffer();)
     GLSL(})
 }
@@ -659,7 +639,7 @@ static void pass_tone_map(struct gl_shader_cache *sc,
     GLSLF("float sig_avg = %f;\n", sdr_avg);
 
     if (opts->compute_peak >= 0)
-        hdr_update_peak(sc);
+        hdr_update_peak(sc, opts);
 
     GLSLF("vec3 sig = color.rgb;\n");
 
-- 
cgit v1.2.3


From 12e58ff8a65c537a222a3fb954f88d98a3a5bfd2 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Wed, 2 Jan 2019 03:03:38 +0100
Subject: vo_gpu: allow boosting dark scenes when tone mapping

In theory our "eye adaptation" algorithm works in both ways, both
darkening bright scenes and brightening dark scenes. But I've always
just prevented the latter with a hard clamp, since I wanted to avoid
blowing up dark scenes into looking funny (and full of noise).

But allowing a tiny bit of over-exposure might be a good thing. I won't
change the default just yet (better let users test), but a moderate
value of 1.2 might be better than the current 1.0 limit. Needs testing
especially on dark scenes.
---
 video/out/gpu/video.c         | 2 ++
 video/out/gpu/video.h         | 1 +
 video/out/gpu/video_shaders.c | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index a29f09bc3d..6bf0bb31a1 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -316,6 +316,7 @@ static const struct gl_video_opts gl_video_opts_def = {
     .tone_map = {
         .curve = TONE_MAPPING_HABLE,
         .curve_param = NAN,
+        .max_boost = 1.0,
         .decay_rate = 100.0,
         .scene_threshold_low = 50,
         .scene_threshold_high = 200,
@@ -376,6 +377,7 @@ const struct m_sub_options gl_video_conf = {
         OPT_INTRANGE("hdr-scene-threshold-high",
                      tone_map.scene_threshold_high, 0, 0, 10000),
         OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0),
+        OPT_FLOATRANGE("tone-mapping-max-boost", tone_map.max_boost, 0, 1.0, 10.0),
         OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
         OPT_FLOATRANGE("tone-mapping-desaturate-exponent",
                        tone_map.desat_exp, 0, 0.0, 20.0),
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index 077f69332f..0bd5c57e8f 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -98,6 +98,7 @@ enum tone_mapping {
 struct gl_tone_map_opts {
     int curve;
     float curve_param;
+    float max_boost;
     int compute_peak;
     float decay_rate;
     int scene_threshold_low;
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 0fff8f05f2..fbccd56eb3 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -652,7 +652,7 @@ static void pass_tone_map(struct gl_shader_cache *sc,
     }
 
     GLSL(float sig_orig = sig[sig_idx];)
-    GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg);
+    GLSLF("float slope = min(%f, %f / sig_avg);\n", opts->max_boost, sdr_avg);
     GLSL(sig *= slope;)
     GLSL(sig_peak *= slope;)
 
-- 
cgit v1.2.3


From fdd671188d7edb8d150ec2c93656fb80bf031f12 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Wed, 2 Jan 2019 07:18:29 +0100
Subject: vo_gpu: improve accuracy of HDR brightness estimation

This change switches to a logarithmic mean to estimate the average
signal brightness. This handles dark scenes with isolated highlights
much more faithfully than the linear mean did, since the log of the
signal roughly corresponds to the perceptual brightness.
---
 video/out/gpu/video.c         |  4 ++--
 video/out/gpu/video_shaders.c | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 6bf0bb31a1..be49551dfb 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2494,7 +2494,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
     if (detect_peak && !p->hdr_peak_ssbo) {
         struct {
             float average[2];
-            uint32_t frame_sum;
+            int32_t frame_sum;
             uint32_t frame_max;
             uint32_t counter;
         } peak_ssbo = {
@@ -2520,7 +2520,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
         pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
         gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
             "vec2 average;"
-            "uint frame_sum;"
+            "int frame_sum;"
             "uint frame_max;"
             "uint counter;"
         );
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index fbccd56eb3..127db58ea2 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -574,21 +574,24 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
     GLSL(sig_avg  = max(1e-3, average.x);)
     GLSL(sig_peak = max(1.00, average.y);)
 
+    // Chosen to avoid overflowing on an 8K buffer
+    const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
+
     // For performance, and to avoid overflows, we tally up the sub-results per
     // pixel using shared memory first
-    GLSLH(shared uint wg_sum;)
+    GLSLH(shared int wg_sum;)
     GLSLH(shared uint wg_max;)
-    GLSL(wg_sum = wg_max = 0;)
+    GLSL(wg_sum = 0; wg_max = 0;)
     GLSL(barrier();)
-    GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
-    GLSL(atomicAdd(wg_sum, sig_uint);)
-    GLSL(atomicMax(wg_max, sig_uint);)
+    GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min);
+    GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale);
+    GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale);
 
     // Have one thread per work group update the global atomics
     GLSL(memoryBarrierShared();)
     GLSL(barrier();)
     GLSL(if (gl_LocalInvocationIndex == 0) {)
-    GLSL(    uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
+    GLSL(    int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
     GLSL(    atomicAdd(frame_sum, wg_avg);)
     GLSL(    atomicMax(frame_max, wg_max);)
     GLSL(    memoryBarrierBuffer();)
@@ -600,7 +603,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
     GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
     GLSL(    counter = 0;)
     GLSL(    vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
-    GLSLF("  cur *= 1.0/%f;\n", MP_REF_WHITE);
+    GLSLF("  cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale);
+    GLSL(    cur.x = exp(cur.x);)
 
     // Use an IIR low-pass filter to smooth out the detected values, with a
     // configurable decay rate based on the desired time constant (tau)
@@ -615,7 +619,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
     GLSL(    average = mix(average, cur, weight);)
 
     // Reset SSBO state for the next frame
-    GLSL(    frame_max = frame_sum = 0;)
+    GLSL(    frame_sum = 0; frame_max = 0;)
     GLSL(    memoryBarrierBuffer();)
     GLSL(})
 }
-- 
cgit v1.2.3


From 4e8022da269d02c3bb23e4e119e4b1dc9aa3f3e4 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Thu, 3 Jan 2019 00:44:15 +0100
Subject: vo_gpu: allow color management in dumb mode

There's no point to disallow target-trc/prim in dumb mode, since they
still work fine.
---
 video/out/gpu/video.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index be49551dfb..5e805019ea 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -3513,9 +3513,9 @@ static bool check_dumb_mode(struct gl_video *p)
         return false;
 
     // otherwise, use auto-detection
-    if (o->target_prim || o->target_trc || o->correct_downscaling ||
-        o->linear_downscaling || o->linear_upscaling || o->sigmoid_upscaling ||
-        o->interpolation || o->blend_subs || o->deband || o->unsharp)
+    if (o->correct_downscaling || o->linear_downscaling ||
+        o->linear_upscaling || o->sigmoid_upscaling || o->interpolation ||
+        o->blend_subs || o->deband || o->unsharp)
         return false;
     // check remaining scalers (tscale is already implicitly excluded above)
     for (int i = 0; i < SCALER_COUNT; i++) {
@@ -3527,8 +3527,6 @@ static bool check_dumb_mode(struct gl_video *p)
     }
     if (o->user_shaders && o->user_shaders[0])
         return false;
-    if (p->use_lut_3d)
-        return false;
     return true;
 }
 
@@ -3631,6 +3629,9 @@ static void check_gl_features(struct gl_video *p)
             .early_flush = p->opts.early_flush,
             .icc_opts = p->opts.icc_opts,
             .hwdec_interop = p->opts.hwdec_interop,
+            .target_trc = p->opts.target_trc,
+            .target_prim = p->opts.target_prim,
+            .target_peak = p->opts.target_peak,
         };
         for (int n = 0; n < SCALER_COUNT; n++)
             p->opts.scaler[n] = gl_video_opts_def.scaler[n];
-- 
cgit v1.2.3


From 11b58415d51e14760ffb0302d9c6d86a504a2b57 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Thu, 3 Jan 2019 01:29:08 +0100
Subject: vo_gpu: improve numerical accuracy of PQ OETF constant

Not a huge deal, but we can do the division in C, which makes the float
constant larger.
---
 video/out/gpu/video_shaders.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 127db58ea2..3b6c9d01af 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -380,7 +380,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
         GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n"
               "             / (vec3(%f) - vec3(%f) * color.rgb);\n",
               PQ_C1, PQ_C2, PQ_C3);
-        GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M1);
+        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", 1.0 / PQ_M1);
         // PQ's output range is 0-10000, but we need it to be relative to to
         // MP_REF_WHITE instead, so rescale
         GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE);
-- 
cgit v1.2.3


From 677ae4f8fe5c9896bc7b7b174e75400c15afc146 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Thu, 3 Jan 2019 01:39:23 +0100
Subject: vo_gpu: make --gamut-warning warn on negative colors

As is the case for actually out-of-gamut colors (rather than just too
bright colors).
---
 video/out/gpu/video_shaders.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 3b6c9d01af..806f0e17dd 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -813,7 +813,8 @@ void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
 
     // Warn for remaining out-of-gamut colors is enabled
     if (opts->gamut_warning) {
-        GLSL(if (any(greaterThan(color.rgb, vec3(1.01)))))
+        GLSL(if (any(greaterThan(color.rgb, vec3(1.01))) ||
+                 any(lessThan(color.rgb, vec3(0.0)))))
             GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert
     }
 
-- 
cgit v1.2.3


From 258ed5d471334ef756563a5384540c063697f3b3 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Thu, 3 Jan 2019 01:40:08 +0100
Subject: vo_gpu: tone map before gamut mapping

Gamut mapping can take very bright out-of-gamut colors into the
negatives, which completely destroys the color balance (which tone
mapping tries its best to preserve).
---
 video/out/gpu/video_shaders.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 806f0e17dd..07ac0b940f 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -784,6 +784,10 @@ void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
     if (need_ootf)
         pass_ootf(sc, src.light, src.sig_peak);
 
+    // Tone map to prevent clipping due to excessive brightness
+    if (src.sig_peak > dst.sig_peak)
+        pass_tone_map(sc, src.sig_peak, dst.sig_peak, opts);
+
     // Adapt to the right colorspace if necessary
     if (src.primaries != dst.primaries) {
         struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries),
@@ -794,11 +798,6 @@ void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
         GLSL(color.rgb = cms_matrix * color.rgb;)
     }
 
-    // Tone map to prevent clipping when the source signal peak exceeds the
-    // encodable range or we've reduced the gamut
-    if (src.sig_peak > dst.sig_peak)
-        pass_tone_map(sc, src.sig_peak, dst.sig_peak, opts);
-
     if (need_ootf)
         pass_inverse_ootf(sc, dst.light, dst.sig_peak);
 
-- 
cgit v1.2.3


From b4b719e33748970a9bf98a82a017d8f149ecb557 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Thu, 3 Jan 2019 17:18:58 +0100
Subject: vo_gpu: clamp sigmoid function

Can explode on some clips otherwise
---
 video/out/gpu/video.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 5e805019ea..df357b3552 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2381,6 +2381,7 @@ static void pass_scale_main(struct gl_video *p)
         // values at 1 and 0, and then scale/shift them, respectively.
         sig_offset = 1.0/(1+expf(sig_slope * sig_center));
         sig_scale  = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset;
+        GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
         GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n",
                 sig_center, sig_scale, sig_offset, sig_slope);
         pass_opt_hook_point(p, "SIGMOID", NULL);
@@ -2408,6 +2409,7 @@ static void pass_scale_main(struct gl_video *p)
     GLSLF("// scaler post-conversion\n");
     if (use_sigmoid) {
         // Inverse of the transformation above
+        GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
         GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n",
                 sig_slope, sig_center, sig_offset, sig_scale);
     }
-- 
cgit v1.2.3


From 3f1bc25d4de6150b0acff7e92d3e3084a7d989f0 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Fri, 4 Jan 2019 16:46:38 +0100
Subject: vo_gpu: use dB units for scene change detection

Rather than the linear cd/m^2 units, these (relative) logarithmic units
lend themselves much better to actually detecting scene changes,
especially since the scene averaging was changed to also work
logarithmically.
---
 video/out/gpu/video.c         | 12 ++++++------
 video/out/gpu/video.h         |  4 ++--
 video/out/gpu/video_shaders.c |  7 ++++---
 3 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index df357b3552..24e6990139 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -318,8 +318,8 @@ static const struct gl_video_opts gl_video_opts_def = {
         .curve_param = NAN,
         .max_boost = 1.0,
         .decay_rate = 100.0,
-        .scene_threshold_low = 50,
-        .scene_threshold_high = 200,
+        .scene_threshold_low = 5.5,
+        .scene_threshold_high = 10.0,
         .desat = 0.75,
         .desat_exp = 1.5,
     },
@@ -372,10 +372,10 @@ const struct m_sub_options gl_video_conf = {
                     {"yes", 1},
                     {"no", -1})),
         OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0),
-        OPT_INTRANGE("hdr-scene-threshold-low",
-                     tone_map.scene_threshold_low, 0, 0, 10000),
-        OPT_INTRANGE("hdr-scene-threshold-high",
-                     tone_map.scene_threshold_high, 0, 0, 10000),
+        OPT_FLOATRANGE("hdr-scene-threshold-low",
+                       tone_map.scene_threshold_low, 0, 0, 20.0),
+        OPT_FLOATRANGE("hdr-scene-threshold-high",
+                       tone_map.scene_threshold_high, 0, 0, 20.0),
         OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0),
         OPT_FLOATRANGE("tone-mapping-max-boost", tone_map.max_boost, 0, 1.0, 10.0),
         OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index 0bd5c57e8f..1b0994ac78 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -101,8 +101,8 @@ struct gl_tone_map_opts {
     float max_boost;
     int compute_peak;
     float decay_rate;
-    int scene_threshold_low;
-    int scene_threshold_high;
+    float scene_threshold_low;
+    float scene_threshold_high;
     float desat;
     float desat_exp;
     int gamut_warning; // bool
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 07ac0b940f..5fea739385 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -613,9 +613,10 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
     GLSLF("  average += %f * (cur - average);\n", decay);
 
     // Scene change hysteresis
-    GLSLF("  float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n",
-          (float) opts->scene_threshold_low / MP_REF_WHITE,
-          (float) opts->scene_threshold_high / MP_REF_WHITE);
+    float log_db = 10.0 / log(10.0);
+    GLSLF("  float weight = smoothstep(%f, %f, abs(log(cur.x / average.x)));\n",
+          opts->scene_threshold_low / log_db,
+          opts->scene_threshold_high / log_db);
     GLSL(    average = mix(average, cur, weight);)
 
     // Reset SSBO state for the next frame
-- 
cgit v1.2.3


From 8b563a034604ff5ab2ad92d12c63e806f45d1bb6 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Wed, 9 Jan 2019 17:14:19 +0100
Subject: vo_gpu: fix initial seeding of the peak detect ssbo

This solves some edge cases when using files with very weird metadata
(e.g. MaxCLL 10k and so forth). Instead of just blindly seeding it with
the tagged metadata, forcibly set the initial state from the detected
values.
---
 video/out/gpu/video.c         | 4 +---
 video/out/gpu/video_shaders.c | 8 ++++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 24e6990139..593f5fb9c1 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2499,9 +2499,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
             int32_t frame_sum;
             uint32_t frame_max;
             uint32_t counter;
-        } peak_ssbo = {
-            .average = { 0.25, src.sig_peak },
-        };
+        } peak_ssbo = {0};
 
         struct ra_buf_params params = {
             .type = RA_BUF_TYPE_SHADER_STORAGE,
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 5fea739385..b34aa90bfa 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -571,8 +571,10 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
                             const struct gl_tone_map_opts *opts)
 {
     // Update the sig_peak/sig_avg from the old SSBO state
-    GLSL(sig_avg  = max(1e-3, average.x);)
-    GLSL(sig_peak = max(1.00, average.y);)
+    GLSL(if (average.y > 0.0) {)
+    GLSL(    sig_avg  = max(1e-3, average.x);)
+    GLSL(    sig_peak = max(1.00, average.y);)
+    GLSL(})
 
     // Chosen to avoid overflowing on an 8K buffer
     const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
@@ -605,6 +607,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
     GLSL(    vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
     GLSLF("  cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale);
     GLSL(    cur.x = exp(cur.x);)
+    GLSL(    if (average.y == 0.0))
+    GLSL(        average = cur;)
 
     // Use an IIR low-pass filter to smooth out the detected values, with a
     // configurable decay rate based on the desired time constant (tau)
-- 
cgit v1.2.3


From e37c253b9207980a33ff3789b560efa3c4b6eb3e Mon Sep 17 00:00:00 2001
From: zc62 <chenzh1993@gmail.com>
Date: Mon, 4 Mar 2019 05:46:35 -0500
Subject: lcms: allow infinite contrast

Fixes #5980
---
 video/out/gpu/lcms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/lcms.c b/video/out/gpu/lcms.c
index bc76db965f..a8f277d3f0 100644
--- a/video/out/gpu/lcms.c
+++ b/video/out/gpu/lcms.c
@@ -83,7 +83,7 @@ const struct m_sub_options mp_icc_conf = {
         OPT_FLAG("icc-profile-auto", profile_auto, 0),
         OPT_STRING("icc-cache-dir", cache_dir, M_OPT_FILE),
         OPT_INT("icc-intent", intent, 0),
-        OPT_INTRANGE("icc-contrast", contrast, 0, 0, 1000000),
+        OPT_CHOICE_OR_INT("icc-contrast", contrast, 0, 0, 1000000, ({"inf", -1})),
         OPT_STRING_VALIDATE("icc-3dlut-size", size_str, 0, validate_3dlut_size_opt),
 
         OPT_REPLACED("3dlut-size", "icc-3dlut-size"),
-- 
cgit v1.2.3


From b3cbd4650984902548432f15be9f267f9cb2230e Mon Sep 17 00:00:00 2001
From: Bin Jin <bjin@ctrl-d.org>
Date: Thu, 7 Mar 2019 14:53:52 +0000
Subject: vo_gpu: make texture offset available to CHROMA hooks

Before this commit, texture offset is set after all source textures
are finalized. Which means CHROMA hooks won't be able to align with
luma planes. This could be problematic for chroma prescalers utilizing
information from luma plane.

Fix this by find the reference texture early, and set global texture
offset early.
---
 video/out/gpu/video.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 593f5fb9c1..416ba928d1 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2072,6 +2072,23 @@ static void pass_read_video(struct gl_video *p)
         }
     }
 
+    // The basic idea is we assume the rgb/luma texture is the "reference" and
+    // scale everything else to match, after all planes are finalized.
+    // We find the reference texture first, in order to maintain texture offset
+    // between hooks on different type of planes.
+    int reference_tex_num = 0;
+    for (int n = 0; n < 4; n++) {
+        switch (img[n].type) {
+        case PLANE_RGB:
+        case PLANE_XYZ:
+        case PLANE_LUMA: break;
+        default: continue;
+        }
+
+        reference_tex_num = n;
+        break;
+    }
+
     // Dispatch the hooks for all of these textures, saving and perhaps
     // modifying them in the process
     for (int n = 0; n < 4; n++) {
@@ -2086,26 +2103,18 @@ static void pass_read_video(struct gl_video *p)
         }
 
         img[n] = pass_hook(p, name, img[n], &offsets[n]);
+
+        if (reference_tex_num == n) {
+            // The reference texture is finalized now.
+            p->texture_w = img[n].w;
+            p->texture_h = img[n].h;
+            p->texture_offset = offsets[n];
+        }
     }
 
     // At this point all planes are finalized but they may not be at the
     // required size yet. Furthermore, they may have texture offsets that
-    // require realignment. For lack of something better to do, we assume
-    // the rgb/luma texture is the "reference" and scale everything else
-    // to match.
-    for (int n = 0; n < 4; n++) {
-        switch (img[n].type) {
-        case PLANE_RGB:
-        case PLANE_XYZ:
-        case PLANE_LUMA: break;
-        default: continue;
-        }
-
-        p->texture_w = img[n].w;
-        p->texture_h = img[n].h;
-        p->texture_offset = offsets[n];
-        break;
-    }
+    // require realignment.
 
     // Compute the reference rect
     struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h};
-- 
cgit v1.2.3


From 1d0349d3b5d9a263251fcb3b0d7e135d4731bfd0 Mon Sep 17 00:00:00 2001
From: Bin Jin <bjin@ctrl-d.org>
Date: Thu, 7 Mar 2019 15:06:24 +0000
Subject: vo_gpu: add two useful operators to user shader

modulo operator could be used to check if size is multiple of a
certain number.

equal operator could be used to verify if size of different textures
aligns.
---
 video/out/gpu/user_shaders.c | 5 +++++
 video/out/gpu/user_shaders.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'video/out/gpu')

diff --git a/video/out/gpu/user_shaders.c b/video/out/gpu/user_shaders.c
index 446941b03f..0613eb93f6 100644
--- a/video/out/gpu/user_shaders.c
+++ b/video/out/gpu/user_shaders.c
@@ -16,6 +16,7 @@
  */
 
 #include <assert.h>
+#include <math.h>
 
 #include "common/msg.h"
 #include "misc/ctype.h"
@@ -52,9 +53,11 @@ static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE])
         case '-': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_SUB; continue;
         case '*': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MUL; continue;
         case '/': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_DIV; continue;
+        case '%': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MOD; continue;
         case '!': exp->tag = SZEXP_OP1; exp->val.op = SZEXP_OP_NOT; continue;
         case '>': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_GT;  continue;
         case '<': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_LT;  continue;
+        case '=': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_EQ;  continue;
         }
 
         if (mp_isdigit(word.start[0])) {
@@ -118,8 +121,10 @@ bool eval_szexpr(struct mp_log *log, void *priv,
             case SZEXP_OP_SUB: res = op1 - op2; break;
             case SZEXP_OP_MUL: res = op1 * op2; break;
             case SZEXP_OP_DIV: res = op1 / op2; break;
+            case SZEXP_OP_MOD: res = fmodf(op1, op2); break;
             case SZEXP_OP_GT:  res = op1 > op2; break;
             case SZEXP_OP_LT:  res = op1 < op2; break;
+            case SZEXP_OP_EQ:  res = op1 == op2; break;
             default: abort();
             }
 
diff --git a/video/out/gpu/user_shaders.h b/video/out/gpu/user_shaders.h
index 8d8cc6bde0..a477e3ce3d 100644
--- a/video/out/gpu/user_shaders.h
+++ b/video/out/gpu/user_shaders.h
@@ -30,9 +30,11 @@ enum szexp_op {
     SZEXP_OP_SUB,
     SZEXP_OP_MUL,
     SZEXP_OP_DIV,
+    SZEXP_OP_MOD,
     SZEXP_OP_NOT,
     SZEXP_OP_GT,
     SZEXP_OP_LT,
+    SZEXP_OP_EQ,
 };
 
 enum szexp_tag {
-- 
cgit v1.2.3