summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2018-02-03 14:45:01 +0100
committerKevin Mitchell <kevmitch@gmail.com>2018-02-05 23:11:18 -0800
commite3d93fde2f60c3eef9673e73d7fe156f27ee715f (patch)
tree57c773ce0376cdaf38d164580fc1b230848248a0
parent0870859e3d3658ae7cc69201cfc4bbe41d93fbf5 (diff)
downloadmpv-e3d93fde2f60c3eef9673e73d7fe156f27ee715f.tar.bz2
mpv-e3d93fde2f60c3eef9673e73d7fe156f27ee715f.tar.xz
vo_gpu: port HDR tone mapping algorithm from libplacebo
The current peak detection algorithm was very bugged (which contributed to the excessive cross-frame flicker without long normalization) and also didn't take into account the frame average brightness level. The new algorithm both takes into account frame average brightness (in addition to peak brightness), and also computes the values in a more stable/correct way. (The old path was basically undefined behavior) In addition to improving the algorithm, we also switch to hable tone mapping by default, and try to enable peak computation automatically whever possible (compute shaders + SSBOs supported). We also make the desaturation milder, after extensive testing during libplacebo development. I also had to compensate a bit for the representational differences between mpv and libplacebo (libplacebo treats 1.0 as the reference peak, but mpv treats it as the nominal peak), but it shouldn't have caused any problems. This is still not quite the same as libplacebo, since libplacebo also allows tagging the desired scene average brightness on the output, and it also supports reading the scene average brightness from static metadata (MaxFALL) where available. But those changes are a bit more involved. It's possible we could also read this from metadata in the future, but we have problems communicating with AVFrames as it is and I don't want to touch the mpv colorimetry structs for the time being.
-rw-r--r--DOCS/man/options.rst27
-rw-r--r--video/out/gpu/video.c47
-rw-r--r--video/out/gpu/video.h2
-rw-r--r--video/out/gpu/video_shaders.c128
4 files changed, 123 insertions, 81 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index b0e4f8a9a9..44aff271d3 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -5063,7 +5063,7 @@ The following video options are currently all specific to ``--vo=gpu`` and
for in-range material as much as possible. Use this when you care about
color accuracy more than detail preservation. This is somewhere in
between ``clip`` and ``reinhard``, depending on the value of
- ``--tone-mapping-param``. (default)
+ ``--tone-mapping-param``.
reinhard
Reinhard tone mapping algorithm. Very simple continuous curve.
Preserves overall image brightness but uses nonlinear contrast, which
@@ -5074,7 +5074,9 @@ The following video options are currently all specific to ``--vo=gpu`` and
desaturating everything. Developed by John Hable for use in video
games. Use this when you care about detail preservation more than
color/brightness accuracy. This is roughly equivalent to
- ``--hdr-tone-mapping=reinhard --tone-mapping-param=0.24``.
+ ``--hdr-tone-mapping=reinhard --tone-mapping-param=0.24``. If possible,
+ you should also enable ``--hdr-compute-peak`` for the best results.
+ (Default)
gamma
Fits a logarithmic transfer between the tone curves.
linear
@@ -5103,13 +5105,15 @@ The following video options are currently all specific to ``--vo=gpu`` and
linear
Specifies the scale factor to use while stretching. Defaults to 1.0.
-``--hdr-compute-peak``
- Compute the HDR peak per-frame of relying on tagged metadata. These values
- are averaged over local regions as well as over several frames to prevent
- the value from jittering around too much. This option basically gives you
- dynamic, per-scene tone mapping. Requires compute shaders, which is a
- fairly recent OpenGL feature, and will probably also perform horribly on
- some drivers, so enable at your own risk.
+``--hdr-compute-peak=<auto|yes|no>``
+ Compute the HDR peak and frame average brightness per-frame instead of
+ relying on tagged metadata. These values are averaged over local regions as
+ well as over several frames to prevent the value from jittering around too
+ much. This option basically gives you dynamic, per-scene tone mapping.
+ Requires compute shaders, which is a fairly recent OpenGL feature, and will
+ probably also perform horribly on some drivers, so enable at your own risk.
+ The special value ``auto`` (default) will enable HDR peak computation
+ automatically if compute shaders and SSBOs are supported.
``--tone-mapping-desaturate=<value>``
Apply desaturation for highlights. The parameter essentially controls the
@@ -5119,8 +5123,9 @@ The following video options are currently all specific to ``--vo=gpu`` and
into white instead. This makes images feel more natural, at the cost of
reducing information about out-of-range colors.
- The default of 1.0 provides a good balance that roughly matches the look
- and feel of the ACES ODT curves. A setting of 0.0 disables this option.
+ The default of 0.5 provides a good balance. This value is weaker than the
+ ACES ODT curves' recommendation, but works better for most content in
+ practice. A setting of 0.0 disables this option.
``--gamut-warning``
If enabled, mpv will mark all clipped/out-of-gamut pixels that exceed a
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 24b14c537b..9bf7baeb77 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -313,9 +313,9 @@ static const struct gl_video_opts gl_video_opts_def = {
.alpha_mode = ALPHA_BLEND_TILES,
.background = {0, 0, 0, 255},
.gamma = 1.0f,
- .tone_mapping = TONE_MAPPING_MOBIUS,
+ .tone_mapping = TONE_MAPPING_HABLE,
.tone_mapping_param = NAN,
- .tone_mapping_desat = 1.0,
+ .tone_mapping_desat = 0.5,
.early_flush = -1,
.hwdec_interop = "auto",
};
@@ -358,7 +358,10 @@ const struct m_sub_options gl_video_conf = {
{"hable", TONE_MAPPING_HABLE},
{"gamma", TONE_MAPPING_GAMMA},
{"linear", TONE_MAPPING_LINEAR})),
- OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0),
+ OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0,
+ ({"auto", 0},
+ {"yes", 1},
+ {"no", -1})),
OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0),
OPT_FLAG("gamut-warning", gamut_warning, 0),
@@ -2442,20 +2445,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
dst.gamma = MP_CSP_TRC_GAMMA22;
}
- bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma);
+ bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma);
if (detect_peak && !p->hdr_peak_ssbo) {
struct {
- unsigned int sig_peak_raw;
- unsigned int index;
+ unsigned int counter;
+ unsigned int frame_idx;
+ unsigned int frame_num;
unsigned int frame_max[PEAK_DETECT_FRAMES+1];
+ unsigned int frame_sum[PEAK_DETECT_FRAMES+1];
+ unsigned int total_max;
+ unsigned int total_sum;
} peak_ssbo = {0};
- // Prefill with safe values
- int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma);
- peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe;
- for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++)
- peak_ssbo.frame_max[i] = safe;
-
struct ra_buf_params params = {
.type = RA_BUF_TYPE_SHADER_STORAGE,
.size = sizeof(peak_ssbo),
@@ -2465,7 +2466,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
p->hdr_peak_ssbo = ra_buf_create(ra, &params);
if (!p->hdr_peak_ssbo) {
MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n");
- detect_peak = (p->opts.compute_hdr_peak = false);
+ detect_peak = false;
+ p->opts.compute_hdr_peak = -1;
}
}
@@ -2473,9 +2475,15 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
pass_describe(p, "detect HDR peak");
pass_is_compute(p, 8, 8); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
- "uint sig_peak_raw;"
- "uint index;"
- "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1
+ "uint counter;"
+ "uint frame_idx;"
+ "uint frame_num;"
+ "uint frame_max[%d];"
+ "uint frame_sum[%d];"
+ "uint total_max;"
+ "uint total_sum;",
+ PEAK_DETECT_FRAMES + 1,
+ PEAK_DETECT_FRAMES + 1
);
}
@@ -3504,9 +3512,10 @@ static void check_gl_features(struct gl_video *p)
p->opts.deband = 0;
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
}
- if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) {
- p->opts.compute_hdr_peak = 0;
- MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n");
+ if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak >= 0) {
+ int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V;
+ MP_MSG(p, msgl, "Disabling HDR peak computation (no compute shaders).\n");
+ p->opts.compute_hdr_peak = -1;
}
}
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index 78f8828f99..71666059f9 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -96,7 +96,7 @@ enum tone_mapping {
};
// How many frames to average over for HDR peak detection
-#define PEAK_DETECT_FRAMES 100
+#define PEAK_DETECT_FRAMES 20
struct gl_video_opts {
int dumb_mode;
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 3e71c31369..8e33255390 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -553,13 +553,63 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
default:
abort();
}
+}
- GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
+// Average light level for SDR signals. This is equal to a signal level of 0.5
+// under a typical presentation gamma of about 2.0.
+static const float sdr_avg = 0.25;
+
+static void hdr_update_peak(struct gl_shader_cache *sc)
+{
+ // For performance, we want to do as few atomic operations on global
+ // memory as possible, so use an atomic in shmem for the work group.
+ GLSLH(shared uint wg_sum;);
+ GLSL(wg_sum = 0;)
+
+ // Have each thread update the work group sum with the local value
+ GLSL(barrier();)
+ GLSLF("atomicAdd(wg_sum, uint(sig * %f));\n", MP_REF_WHITE);
+
+ // Have one thread per work group update the global atomics. We use the
+ // work group average even for the global sum, to make the values slightly
+ // more stable and smooth out tiny super-highlights.
+ GLSL(memoryBarrierShared();)
+ GLSL(barrier();)
+ GLSL(if (gl_LocalInvocationIndex == 0) {)
+ GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
+ GLSL( atomicMax(frame_max[frame_idx], wg_avg);)
+ GLSL( atomicAdd(frame_sum[frame_idx], wg_avg);)
+ GLSL(})
+
+ // Update the sig_peak/sig_avg from the old SSBO state
+ GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
+ GLSL(if (frame_num > 0) {)
+ GLSLF(" float peak = float(total_max) / (%f * float(frame_num));\n", MP_REF_WHITE);
+ GLSLF(" float avg = float(total_sum) / (%f * float(frame_num * num_wg));\n", MP_REF_WHITE);
+ GLSLF(" sig_peak = max(1.0, peak);\n");
+ GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg);
+ GLSL(});
+
+ // Finally, to update the global state, we increment a counter per dispatch
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
+ GLSL( counter = 0;)
+ // Add the current frame, then subtract and reset the next frame
+ GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
+ GLSL( total_max += frame_max[frame_idx] - frame_max[next];)
+ GLSL( total_sum += frame_sum[frame_idx] - frame_sum[next];)
+ GLSL( frame_max[next] = frame_sum[next] = 0;)
+ // Update the index and count
+ GLSL( frame_idx = next;)
+ GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES);
+ GLSL(})
}
// Tone map from a known peak brightness to the range [0,1]. If ref_peak
// is 0, we will use peak detection instead
-static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
+static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
+ float src_peak, float dst_range,
enum tone_mapping algo, float param, float desat)
{
GLSLF("// HDR tone mapping\n");
@@ -568,6 +618,16 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// sure to reduce the value range as far as necessary to keep the entire
// signal in range, so tone map based on the brightest component.
GLSL(float sig = max(max(color.r, color.g), color.b);)
+ GLSLF("float sig_peak = %f;\n", src_peak);
+ GLSLF("float sig_avg = %f;\n", sdr_avg);
+
+ // Rescale the variables in order to bring it into a representation where
+ // 1.0 represents the dst_peak. This is because all of the tone mapping
+ // algorithms are defined in such a way that they map to the range [0.0, 1.0].
+ if (dst_range > 1.0) {
+ GLSLF("sig *= %f;\n", 1.0 / dst_range);
+ GLSLF("sig_peak *= %f;\n", 1.0 / dst_range);
+ }
// Desaturate the color using a coefficient dependent on the signal
if (desat > 0) {
@@ -578,41 +638,14 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig`
}
- if (!ref_peak) {
- // For performance, we want to do as few atomic operations on global
- // memory as possible, so use an atomic in shmem for the work group.
- // We also want slightly more stable values, so use the group average
- // instead of the group max
- GLSLHF("shared uint group_sum = 0;\n");
- GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE);
-
- // Have one thread in each work group update the frame maximum
- GLSL(memoryBarrierBuffer();)
- GLSL(barrier();)
- GLSL(if (gl_LocalInvocationIndex == 0))
- GLSL(atomicMax(frame_max[index], group_sum /
- (gl_WorkGroupSize.x * gl_WorkGroupSize.y));)
-
- // Finally, have one thread per invocation update the total maximum
- // and advance the index
- GLSL(memoryBarrierBuffer();)
- GLSL(barrier();)
- GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation
- GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
- GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n");
- GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE);
- GLSL(index = next;)
- GLSL(})
-
- GLSL(memoryBarrierBuffer();)
- GLSL(barrier();)
- GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n",
- MP_REF_WHITE * PEAK_DETECT_FRAMES);
- } else {
- GLSLHF("const float sig_peak = %f;\n", ref_peak);
- }
+ if (detect_peak)
+ hdr_update_peak(sc);
GLSL(float sig_orig = sig;)
+ GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg);
+ GLSL(sig *= slope;)
+ GLSL(sig_peak *= slope;)
+
switch (algo) {
case TONE_MAPPING_CLIP:
GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param);
@@ -668,6 +701,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// Apply the computed scale factor to the color, linearly to prevent
// discoloration
+ GLSL(sig = min(sig, 1.0);)
GLSL(color.rgb *= sig / sig_orig;)
}
@@ -689,7 +723,6 @@ void pass_color_map(struct gl_shader_cache *sc,
// Compute the highest encodable level
float src_range = mp_trc_nom_peak(src.gamma),
dst_range = mp_trc_nom_peak(dst.gamma);
- float ref_peak = src.sig_peak / dst_range;
// Some operations need access to the video's luma coefficients, so make
// them available
@@ -709,20 +742,13 @@ void pass_color_map(struct gl_shader_cache *sc,
src.light != dst.light;
if (need_gamma && !is_linear) {
+ // We also pull it up so that 1.0 is the reference white
pass_linearize(sc, src.gamma);
- is_linear= true;
+ is_linear = true;
}
if (src.light != dst.light)
- pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma));
-
- // Rescale the signal to compensate for differences in the encoding range
- // and reference white level. This is necessary because of how mpv encodes
- // brightness in textures.
- if (src_range != dst_range) {
- GLSLF("// rescale value range;\n");
- GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range);
- }
+ pass_ootf(sc, src.light, src_range);
// Adapt to the right colorspace if necessary
if (src.primaries != dst.primaries) {
@@ -734,18 +760,20 @@ void pass_color_map(struct gl_shader_cache *sc,
GLSL(color.rgb = cms_matrix * color.rgb;)
// Since this can reduce the gamut, figure out by how much
for (int c = 0; c < 3; c++)
- ref_peak = MPMAX(ref_peak, m[c][c]);
+ src.sig_peak = MPMAX(src.sig_peak, m[c][c]);
}
// Tone map to prevent clipping when the source signal peak exceeds the
// encodable range or we've reduced the gamut
- if (ref_peak > 1) {
- pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo,
+ if (src.sig_peak > dst_range) {
+ GLSLF("color.rgb *= vec3(%f);\n", src_range);
+ pass_tone_map(sc, detect_peak, src.sig_peak, dst_range, algo,
tone_mapping_param, tone_mapping_desat);
+ GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range);
}
if (src.light != dst.light)
- pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma));
+ pass_inverse_ootf(sc, dst.light, dst_range);
// Warn for remaining out-of-gamut colors is enabled
if (gamut_warning) {