From c335e84230916d7d7a38288031516e8b2ec1c36b Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Sat, 10 Jun 2017 14:01:25 +0200
Subject: video: refactor HDR implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

List of changes:

1. Kill nom_peak, since it's a pointless non-field that stores nothing
   of value and is _always_ derived from ref_white anyway.

2. Kill ref_white/--target-brightness, because the only case it really
   existed for (PQ) actually doesn't need to be this general: According
   to ITU-R BT.2100, PQ *always* assumes a reference monitor with a
   white point of 100 cd/m².

3. Improve documentation and comments surrounding this stuff.
4. Clean up some of the code in general. Move stuff where it belongs.
---
 video/out/opengl/video.c         | 40 +++----------------
 video/out/opengl/video_shaders.c | 84 ++++++++++++++++++++++------------------
 video/out/opengl/video_shaders.h |  3 +-
 3 files changed, 54 insertions(+), 73 deletions(-)

(limited to 'video/out')

diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 51d484d078..2cf5a41c0b 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -290,7 +290,6 @@ static const struct gl_video_opts gl_video_opts_def = {
     .alpha_mode = ALPHA_BLEND_TILES,
     .background = {0, 0, 0, 255},
     .gamma = 1.0f,
-    .target_brightness = 250,
     .hdr_tone_mapping = TONE_MAPPING_MOBIUS,
     .tone_mapping_param = NAN,
     .early_flush = -1,
@@ -325,7 +324,6 @@ const struct m_sub_options gl_video_conf = {
         OPT_FLAG("gamma-auto", gamma_auto, 0),
         OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
         OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
-        OPT_INTRANGE("target-brightness", target_brightness, 0, 1, 100000),
         OPT_CHOICE("hdr-tone-mapping", hdr_tone_mapping, 0,
                    ({"clip",     TONE_MAPPING_CLIP},
                     {"mobius",   TONE_MAPPING_MOBIUS},
@@ -2053,17 +2051,11 @@ static void pass_scale_main(struct gl_video *p)
 // by previous passes (i.e. linear scaling)
 static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool osd)
 {
-    struct mp_colorspace ref = src;
-
-    if (p->use_linear && !osd)
-        src.gamma = MP_CSP_TRC_LINEAR;
-
     // Figure out the target color space from the options, or auto-guess if
     // none were set
     struct mp_colorspace dst = {
         .gamma = p->opts.target_trc,
         .primaries = p->opts.target_prim,
-        .nom_peak = mp_csp_trc_nom_peak(p->opts.target_trc, p->opts.target_brightness),
     };
 
     if (p->use_lut_3d) {
@@ -2095,14 +2087,14 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
         // this as the default output color space.
         dst.primaries = MP_CSP_PRIM_BT_709;
 
-        if (ref.primaries == MP_CSP_PRIM_BT_601_525 ||
-            ref.primaries == MP_CSP_PRIM_BT_601_625)
+        if (src.primaries == MP_CSP_PRIM_BT_601_525 ||
+            src.primaries == MP_CSP_PRIM_BT_601_625)
         {
             // Since we auto-pick BT.601 and BT.709 based on the dimensions,
             // combined with the fact that they're very similar to begin with,
             // and to avoid confusing the average user, just don't adapt BT.601
             // content automatically at all.
-            dst.primaries = ref.primaries;
+            dst.primaries = src.primaries;
         }
     }
 
@@ -2112,7 +2104,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
         // altogether by default. The only exceptions to this rule apply to
         // very unusual TRCs, which even hardcode technoluddites would probably
         // not enjoy viewing unaltered.
-        dst.gamma = ref.gamma;
+        dst.gamma = src.gamma;
 
         // Avoid outputting linear light or HDR content "by default". For these
         // just pick gamma 2.2 as a default, since it's a good estimate for
@@ -2121,30 +2113,9 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
             dst.gamma = MP_CSP_TRC_GAMMA22;
     }
 
-    // For the src peaks, the correct brightness metadata may be present for
-    // sig_peak, nom_peak, both, or neither. To handle everything in a generic
-    // way, it's important to never automatically infer a sig_peak that is
-    // below the nom_peak (since we don't know what bits the image contains,
-    // doing so would potentially badly clip). The only time in which this
-    // may be the case is when the mastering metadata explicitly says so, i.e.
-    // the sig_peak was already set. So to simplify the logic as much as
-    // possible, make sure the nom_peak is present and correct first, and just
-    // set sig_peak = nom_peak if missing.
-    if (!src.nom_peak) {
-        // For display-referred colorspaces, we treat it as relative to
-        // target_brightness
-        src.nom_peak = mp_csp_trc_nom_peak(src.gamma, p->opts.target_brightness);
-    }
-
-    if (!src.sig_peak)
-        src.sig_peak = src.nom_peak;
-
-    MP_DBG(p, "HDR src nom: %f sig: %f, dst: %f\n",
-           src.nom_peak, src.sig_peak, dst.nom_peak);
-
     // Adapt from src to dst as necessary
     pass_color_map(p->sc, src, dst, p->opts.hdr_tone_mapping,
-                   p->opts.tone_mapping_param);
+                   p->opts.tone_mapping_param, p->use_linear && !osd);
 
     if (p->use_lut_3d) {
         gl_sc_uniform_tex(p->sc, "lut_3d", GL_TEXTURE_3D, p->lut_3d_texture);
@@ -3089,7 +3060,6 @@ static void check_gl_features(struct gl_video *p)
             .temporal_dither_period = p->opts.temporal_dither_period,
             .tex_pad_x = p->opts.tex_pad_x,
             .tex_pad_y = p->opts.tex_pad_y,
-            .target_brightness = p->opts.target_brightness,
             .hdr_tone_mapping = p->opts.hdr_tone_mapping,
             .tone_mapping_param = p->opts.tone_mapping_param,
             .early_flush = p->opts.early_flush,
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 95ac712f05..4bacb12532 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -233,16 +233,18 @@ static const float B67_A = 0.17883277,
 // Common constants for Panasonic V-Log
 static const float VLOG_B = 0.00873,
                    VLOG_C = 0.241514,
-                   VLOG_D = 0.598206,
-                   VLOG_R = 46.085527; // nominal peak
+                   VLOG_D = 0.598206;
 
-// Linearize (expand), given a TRC as input. This corresponds to the EOTF
-// in ITU-R terminology.
+// Linearize (expand), given a TRC as input. In essence, this is the ITU-R
+// EOTF, calculated on an idealized (reference) monitor with a white point of
+// MP_REF_WHITE and infinite contrast.
 void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
 {
     if (trc == MP_CSP_TRC_LINEAR)
         return;
 
+    GLSLF("// linearize\n");
+
     // Note that this clamp may technically violate the definition of
     // ITU-R BT.2100, which allows for sub-blacks and super-whites to be
     // displayed on the display where such would be possible. That said, the
@@ -257,7 +259,6 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
                              lessThan(vec3(0.04045), color.rgb));)
         break;
     case MP_CSP_TRC_BT_1886:
-        // We don't have an actual black point, so we assume a perfect display
         GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
         break;
     case MP_CSP_TRC_GAMMA18:
@@ -280,17 +281,15 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
               "             / (vec3(%f) - vec3(%f) * color.rgb);\n",
               HDR_C1, HDR_C2, HDR_C3);
         GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", HDR_M1);
+        // PQ's output range is 0-10000, but we need it to be relative to to
+        // MP_REF_WHITE instead, so rescale
+        GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE);
         break;
     case MP_CSP_TRC_ARIB_STD_B67:
         GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n"
               "                exp((color.rgb - vec3(%f)) / vec3(%f)) + vec3(%f),\n"
               "                lessThan(vec3(0.5), color.rgb));\n",
               B67_C, B67_A, B67_B);
-        // Since the ARIB function's signal value of 1.0 corresponds to
-        // a peak of 12.0, we need to renormalize to prevent GL textures
-        // from clipping. (In general, mpv's internal conversions always
-        // assume 1.0 is the maximum brightness, not the reference peak)
-        GLSL(color.rgb /= vec3(12.0);)
         break;
     case MP_CSP_TRC_V_LOG:
         GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) / vec3(5.6), \n"
@@ -298,23 +297,27 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
               "              - vec3(%f),                              \n"
               "    lessThanEqual(vec3(0.181), color.rgb));            \n",
               VLOG_D, VLOG_C, VLOG_B);
-        // Same deal as with the B67 function, renormalize to texture range
-        GLSLF("color.rgb /= vec3(%f);\n", VLOG_R);
-        GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
         break;
     default:
         abort();
     }
+
+    // Rescale to prevent clipping on non-float textures
+    GLSLF("color.rgb /= vec3(%f);\n", mp_trc_nom_peak(trc));
 }
 
 // Delinearize (compress), given a TRC as output. This corresponds to the
-// inverse EOTF (not the OETF) in ITU-R terminology.
+// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a
+// reference monitor.
 void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
 {
     if (trc == MP_CSP_TRC_LINEAR)
         return;
 
+    GLSLF("// delinearize\n");
     GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+    GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc));
+
     switch (trc) {
     case MP_CSP_TRC_SRGB:
         GLSL(color.rgb = mix(color.rgb * vec3(12.92),
@@ -340,6 +343,7 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
                              lessThanEqual(vec3(0.001953), color.rgb));)
         break;
     case MP_CSP_TRC_SMPTE_ST2084:
+        GLSLF("color.rgb /= vec3(%f);\n", 10000 / MP_REF_WHITE);
         GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", HDR_M1);
         GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n"
               "             / (vec3(1.0) + vec3(%f) * color.rgb);\n",
@@ -347,14 +351,12 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
         GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", HDR_M2);
         break;
     case MP_CSP_TRC_ARIB_STD_B67:
-        GLSL(color.rgb *= vec3(12.0);)
         GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n"
               "                vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n"
               "                lessThan(vec3(1.0), color.rgb));\n",
               B67_A, B67_B, B67_C);
         break;
     case MP_CSP_TRC_V_LOG:
-        GLSLF("color.rgb *= vec3(%f);\n", VLOG_R);
         GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125),   \n"
               "                vec3(%f) * log(color.rgb + vec3(%f))   \n"
               "                    + vec3(%f),                        \n"
@@ -429,46 +431,54 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
     }
 }
 
-// Map colors from one source space to another. These source spaces
-// must be known (i.e. not MP_CSP_*_AUTO), as this function won't perform
-// any auto-guessing.
+// Map colors from one source space to another. These source spaces must be
+// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
+// auto-guessing. If is_linear is true, we assume the input has already been
+// linearized (e.g. for linear-scaling)
 void pass_color_map(struct gl_shader_cache *sc,
                     struct mp_colorspace src, struct mp_colorspace dst,
-                    enum tone_mapping algo, float tone_mapping_param)
+                    enum tone_mapping algo, float tone_mapping_param,
+                    bool is_linear)
 {
     GLSLF("// color mapping\n");
 
+    // Compute the highest encodable level
+    float src_range = mp_trc_nom_peak(src.gamma),
+          dst_range = mp_trc_nom_peak(dst.gamma);
+
     // All operations from here on require linear light as a starting point,
     // so we linearize even if src.gamma == dst.gamma when one of the other
     // operations needs it
     bool need_gamma = src.gamma != dst.gamma ||
                       src.primaries != dst.primaries ||
-                      src.nom_peak != dst.nom_peak ||
-                      src.sig_peak > dst.nom_peak;
+                      src_range != dst_range ||
+                      src.sig_peak > dst_range;
 
-    if (need_gamma)
+    if (need_gamma && !is_linear) {
         pass_linearize(sc, src.gamma);
+        is_linear= true;
+    }
 
     // NOTE: When src.gamma = MP_CSP_TRC_ARIB_STD_B67, we would technically
     // need to apply the reference OOTF as part of the EOTF (which is what we
     // implement with pass_linearize), since HLG considers OOTF to be part of
-    // the display's EOTF (as opposed to the camera's OETF). But since this is
-    // stupid, complicated, arbitrary, and more importantly depends on the
-    // target display's signal peak (which is != the nom_peak in the case of
-    // HDR displays, and mpv already has enough target-specific display
-    // options), we just ignore its implementation entirely. (Plus, it doesn't
-    // even really make sense with tone mapping to begin with.) But just in
-    // case somebody ends up complaining about HLG looking different from a
+    // the display's EOTF (as opposed to the camera's OETF) - although arguably
+    // in our case this would be part of the ICC profile, not mpv. Either way,
+    // in case somebody ends up complaining about HLG looking different from a
     // reference HLG display, this comment might be why.
 
-    // Stretch the signal value to renormalize to the dst nominal peak
-    if (src.nom_peak != dst.nom_peak)
-        GLSLF("color.rgb *= vec3(%f);\n", src.nom_peak / dst.nom_peak);
+    // Rescale the signal to compensate for differences in the encoding range
+    // and reference white level. This is necessary because of how mpv encodes
+    // brightness in textures.
+    if (src_range != dst_range) {
+        GLSLF("// rescale value range;\n");
+        GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range);
+    }
 
     // Tone map to prevent clipping when the source signal peak exceeds the
-    // encodable range.
-    if (src.sig_peak > dst.nom_peak)
-        pass_tone_map(sc, src.sig_peak / dst.nom_peak, algo, tone_mapping_param);
+    // encodable range
+    if (src.sig_peak > dst_range)
+        pass_tone_map(sc, src.sig_peak / dst_range, algo, tone_mapping_param);
 
     // Adapt to the right colorspace if necessary
     if (src.primaries != dst.primaries) {
@@ -480,7 +490,7 @@ void pass_color_map(struct gl_shader_cache *sc,
         GLSL(color.rgb = cms_matrix * color.rgb;)
     }
 
-    if (need_gamma)
+    if (is_linear)
         pass_delinearize(sc, dst.gamma);
 }
 
diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h
index 3bc2f210b8..076a835754 100644
--- a/video/out/opengl/video_shaders.h
+++ b/video/out/opengl/video_shaders.h
@@ -40,7 +40,8 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
 
 void pass_color_map(struct gl_shader_cache *sc,
                     struct mp_colorspace src, struct mp_colorspace dst,
-                    enum tone_mapping algo, float tone_mapping_param);
+                    enum tone_mapping algo, float tone_mapping_param,
+                    bool skip_linearization);
 
 void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
                         AVLFG *lfg);
-- 
cgit v1.2.3