summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst27
-rw-r--r--video/out/gpu/video.c47
-rw-r--r--video/out/gpu/video.h2
-rw-r--r--video/out/gpu/video_shaders.c128
4 files changed, 123 insertions, 81 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index b0e4f8a9a9..44aff271d3 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -5063,7 +5063,7 @@ The following video options are currently all specific to ``--vo=gpu`` and
for in-range material as much as possible. Use this when you care about
color accuracy more than detail preservation. This is somewhere in
between ``clip`` and ``reinhard``, depending on the value of
- ``--tone-mapping-param``. (default)
+ ``--tone-mapping-param``.
reinhard
Reinhard tone mapping algorithm. Very simple continuous curve.
Preserves overall image brightness but uses nonlinear contrast, which
@@ -5074,7 +5074,9 @@ The following video options are currently all specific to ``--vo=gpu`` and
desaturating everything. Developed by John Hable for use in video
games. Use this when you care about detail preservation more than
color/brightness accuracy. This is roughly equivalent to
- ``--hdr-tone-mapping=reinhard --tone-mapping-param=0.24``.
+ ``--hdr-tone-mapping=reinhard --tone-mapping-param=0.24``. If possible,
+ you should also enable ``--hdr-compute-peak`` for the best results.
+ (Default)
gamma
Fits a logarithmic transfer between the tone curves.
linear
@@ -5103,13 +5105,15 @@ The following video options are currently all specific to ``--vo=gpu`` and
linear
Specifies the scale factor to use while stretching. Defaults to 1.0.
-``--hdr-compute-peak``
- Compute the HDR peak per-frame of relying on tagged metadata. These values
- are averaged over local regions as well as over several frames to prevent
- the value from jittering around too much. This option basically gives you
- dynamic, per-scene tone mapping. Requires compute shaders, which is a
- fairly recent OpenGL feature, and will probably also perform horribly on
- some drivers, so enable at your own risk.
+``--hdr-compute-peak=<auto|yes|no>``
+ Compute the HDR peak and frame average brightness per-frame instead of
+ relying on tagged metadata. These values are averaged over local regions as
+ well as over several frames to prevent the value from jittering around too
+ much. This option basically gives you dynamic, per-scene tone mapping.
+ Requires compute shaders, which is a fairly recent OpenGL feature, and will
+ probably also perform horribly on some drivers, so enable at your own risk.
+ The special value ``auto`` (default) will enable HDR peak computation
+ automatically if compute shaders and SSBOs are supported.
``--tone-mapping-desaturate=<value>``
Apply desaturation for highlights. The parameter essentially controls the
@@ -5119,8 +5123,9 @@ The following video options are currently all specific to ``--vo=gpu`` and
into white instead. This makes images feel more natural, at the cost of
reducing information about out-of-range colors.
- The default of 1.0 provides a good balance that roughly matches the look
- and feel of the ACES ODT curves. A setting of 0.0 disables this option.
+ The default of 0.5 provides a good balance. This value is weaker than the
+ ACES ODT curves' recommendation, but works better for most content in
+ practice. A setting of 0.0 disables this option.
``--gamut-warning``
If enabled, mpv will mark all clipped/out-of-gamut pixels that exceed a
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 24b14c537b..9bf7baeb77 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -313,9 +313,9 @@ static const struct gl_video_opts gl_video_opts_def = {
.alpha_mode = ALPHA_BLEND_TILES,
.background = {0, 0, 0, 255},
.gamma = 1.0f,
- .tone_mapping = TONE_MAPPING_MOBIUS,
+ .tone_mapping = TONE_MAPPING_HABLE,
.tone_mapping_param = NAN,
- .tone_mapping_desat = 1.0,
+ .tone_mapping_desat = 0.5,
.early_flush = -1,
.hwdec_interop = "auto",
};
@@ -358,7 +358,10 @@ const struct m_sub_options gl_video_conf = {
{"hable", TONE_MAPPING_HABLE},
{"gamma", TONE_MAPPING_GAMMA},
{"linear", TONE_MAPPING_LINEAR})),
- OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0),
+ OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0,
+ ({"auto", 0},
+ {"yes", 1},
+ {"no", -1})),
OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0),
OPT_FLAG("gamut-warning", gamut_warning, 0),
@@ -2442,20 +2445,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
dst.gamma = MP_CSP_TRC_GAMMA22;
}
- bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma);
+ bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma);
if (detect_peak && !p->hdr_peak_ssbo) {
struct {
- unsigned int sig_peak_raw;
- unsigned int index;
+ unsigned int counter;
+ unsigned int frame_idx;
+ unsigned int frame_num;
unsigned int frame_max[PEAK_DETECT_FRAMES+1];
+ unsigned int frame_sum[PEAK_DETECT_FRAMES+1];
+ unsigned int total_max;
+ unsigned int total_sum;
} peak_ssbo = {0};
- // Prefill with safe values
- int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma);
- peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe;
- for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++)
- peak_ssbo.frame_max[i] = safe;
-
struct ra_buf_params params = {
.type = RA_BUF_TYPE_SHADER_STORAGE,
.size = sizeof(peak_ssbo),
@@ -2465,7 +2466,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
p->hdr_peak_ssbo = ra_buf_create(ra, &params);
if (!p->hdr_peak_ssbo) {
MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n");
- detect_peak = (p->opts.compute_hdr_peak = false);
+ detect_peak = false;
+ p->opts.compute_hdr_peak = -1;
}
}
@@ -2473,9 +2475,15 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
pass_describe(p, "detect HDR peak");
pass_is_compute(p, 8, 8); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
- "uint sig_peak_raw;"
- "uint index;"
- "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1
+ "uint counter;"
+ "uint frame_idx;"
+ "uint frame_num;"
+ "uint frame_max[%d];"
+ "uint frame_sum[%d];"
+ "uint total_max;"
+ "uint total_sum;",
+ PEAK_DETECT_FRAMES + 1,
+ PEAK_DETECT_FRAMES + 1
);
}
@@ -3504,9 +3512,10 @@ static void check_gl_features(struct gl_video *p)
p->opts.deband = 0;
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
}
- if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) {
- p->opts.compute_hdr_peak = 0;
- MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n");
+ if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak >= 0) {
+ int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V;
+ MP_MSG(p, msgl, "Disabling HDR peak computation (no compute shaders).\n");
+ p->opts.compute_hdr_peak = -1;
}
}
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index 78f8828f99..71666059f9 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -96,7 +96,7 @@ enum tone_mapping {
};
// How many frames to average over for HDR peak detection
-#define PEAK_DETECT_FRAMES 100
+#define PEAK_DETECT_FRAMES 20
struct gl_video_opts {
int dumb_mode;
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 3e71c31369..8e33255390 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -553,13 +553,63 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
default:
abort();
}
+}
- GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
+// Average light level for SDR signals. This is equal to a signal level of 0.5
+// under a typical presentation gamma of about 2.0.
+static const float sdr_avg = 0.25;
+
+static void hdr_update_peak(struct gl_shader_cache *sc)
+{
+ // For performance, we want to do as few atomic operations on global
+ // memory as possible, so use an atomic in shmem for the work group.
+ GLSLH(shared uint wg_sum;);
+ GLSL(wg_sum = 0;)
+
+ // Have each thread update the work group sum with the local value
+ GLSL(barrier();)
+ GLSLF("atomicAdd(wg_sum, uint(sig * %f));\n", MP_REF_WHITE);
+
+ // Have one thread per work group update the global atomics. We use the
+ // work group average even for the global sum, to make the values slightly
+ // more stable and smooth out tiny super-highlights.
+ GLSL(memoryBarrierShared();)
+ GLSL(barrier();)
+ GLSL(if (gl_LocalInvocationIndex == 0) {)
+ GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
+ GLSL( atomicMax(frame_max[frame_idx], wg_avg);)
+ GLSL( atomicAdd(frame_sum[frame_idx], wg_avg);)
+ GLSL(})
+
+ // Update the sig_peak/sig_avg from the old SSBO state
+ GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
+ GLSL(if (frame_num > 0) {)
+ GLSLF(" float peak = float(total_max) / (%f * float(frame_num));\n", MP_REF_WHITE);
+ GLSLF(" float avg = float(total_sum) / (%f * float(frame_num * num_wg));\n", MP_REF_WHITE);
+ GLSLF(" sig_peak = max(1.0, peak);\n");
+ GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg);
+ GLSL(});
+
+ // Finally, to update the global state, we increment a counter per dispatch
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
+ GLSL( counter = 0;)
+ // Add the current frame, then subtract and reset the next frame
+ GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
+ GLSL( total_max += frame_max[frame_idx] - frame_max[next];)
+ GLSL( total_sum += frame_sum[frame_idx] - frame_sum[next];)
+ GLSL( frame_max[next] = frame_sum[next] = 0;)
+ // Update the index and count
+ GLSL( frame_idx = next;)
+ GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES);
+ GLSL(})
}
// Tone map from a known peak brightness to the range [0,1]. If ref_peak
// is 0, we will use peak detection instead
-static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
+static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
+ float src_peak, float dst_range,
enum tone_mapping algo, float param, float desat)
{
GLSLF("// HDR tone mapping\n");
@@ -568,6 +618,16 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// sure to reduce the value range as far as necessary to keep the entire
// signal in range, so tone map based on the brightest component.
GLSL(float sig = max(max(color.r, color.g), color.b);)
+ GLSLF("float sig_peak = %f;\n", src_peak);
+ GLSLF("float sig_avg = %f;\n", sdr_avg);
+
+ // Rescale the variables in order to bring it into a representation where
+ // 1.0 represents the dst_peak. This is because all of the tone mapping
+ // algorithms are defined in such a way that they map to the range [0.0, 1.0].
+ if (dst_range > 1.0) {
+ GLSLF("sig *= %f;\n", 1.0 / dst_range);
+ GLSLF("sig_peak *= %f;\n", 1.0 / dst_range);
+ }
// Desaturate the color using a coefficient dependent on the signal
if (desat > 0) {
@@ -578,41 +638,14 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig`
}
- if (!ref_peak) {
- // For performance, we want to do as few atomic operations on global
- // memory as possible, so use an atomic in shmem for the work group.
- // We also want slightly more stable values, so use the group average
- // instead of the group max
- GLSLHF("shared uint group_sum = 0;\n");
- GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE);
-
- // Have one thread in each work group update the frame maximum
- GLSL(memoryBarrierBuffer();)
- GLSL(barrier();)
- GLSL(if (gl_LocalInvocationIndex == 0))
- GLSL(atomicMax(frame_max[index], group_sum /
- (gl_WorkGroupSize.x * gl_WorkGroupSize.y));)
-
- // Finally, have one thread per invocation update the total maximum
- // and advance the index
- GLSL(memoryBarrierBuffer();)
- GLSL(barrier();)
- GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation
- GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
- GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n");
- GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE);
- GLSL(index = next;)
- GLSL(})
-
- GLSL(memoryBarrierBuffer();)
- GLSL(barrier();)
- GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n",
- MP_REF_WHITE * PEAK_DETECT_FRAMES);
- } else {
- GLSLHF("const float sig_peak = %f;\n", ref_peak);
- }
+ if (detect_peak)
+ hdr_update_peak(sc);
GLSL(float sig_orig = sig;)
+ GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg);
+ GLSL(sig *= slope;)
+ GLSL(sig_peak *= slope;)
+
switch (algo) {
case TONE_MAPPING_CLIP:
GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param);
@@ -668,6 +701,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// Apply the computed scale factor to the color, linearly to prevent
// discoloration
+ GLSL(sig = min(sig, 1.0);)
GLSL(color.rgb *= sig / sig_orig;)
}
@@ -689,7 +723,6 @@ void pass_color_map(struct gl_shader_cache *sc,
// Compute the highest encodable level
float src_range = mp_trc_nom_peak(src.gamma),
dst_range = mp_trc_nom_peak(dst.gamma);
- float ref_peak = src.sig_peak / dst_range;
// Some operations need access to the video's luma coefficients, so make
// them available
@@ -709,20 +742,13 @@ void pass_color_map(struct gl_shader_cache *sc,
src.light != dst.light;
if (need_gamma && !is_linear) {
+ // We also pull it up so that 1.0 is the reference white
pass_linearize(sc, src.gamma);
- is_linear= true;
+ is_linear = true;
}
if (src.light != dst.light)
- pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma));
-
- // Rescale the signal to compensate for differences in the encoding range
- // and reference white level. This is necessary because of how mpv encodes
- // brightness in textures.
- if (src_range != dst_range) {
- GLSLF("// rescale value range;\n");
- GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range);
- }
+ pass_ootf(sc, src.light, src_range);
// Adapt to the right colorspace if necessary
if (src.primaries != dst.primaries) {
@@ -734,18 +760,20 @@ void pass_color_map(struct gl_shader_cache *sc,
GLSL(color.rgb = cms_matrix * color.rgb;)
// Since this can reduce the gamut, figure out by how much
for (int c = 0; c < 3; c++)
- ref_peak = MPMAX(ref_peak, m[c][c]);
+ src.sig_peak = MPMAX(src.sig_peak, m[c][c]);
}
// Tone map to prevent clipping when the source signal peak exceeds the
// encodable range or we've reduced the gamut
- if (ref_peak > 1) {
- pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo,
+ if (src.sig_peak > dst_range) {
+ GLSLF("color.rgb *= vec3(%f);\n", src_range);
+ pass_tone_map(sc, detect_peak, src.sig_peak, dst_range, algo,
tone_mapping_param, tone_mapping_desat);
+ GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range);
}
if (src.light != dst.light)
- pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma));
+ pass_inverse_ootf(sc, dst.light, dst_range);
// Warn for remaining out-of-gamut colors is enabled
if (gamut_warning) {