summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2019-01-01 07:30:00 +0100
committerJan Ekström <jeebjp@gmail.com>2019-02-18 01:54:06 +0200
commit6179dcbb798aa9e3501af82ae46975e881d80626 (patch)
tree4ec798500982b8a3bf39bb4a03fe242cca5555d1
parent3fe882d4ae80fa060a71dad0d6d1605afcfe98b6 (diff)
downloadmpv-6179dcbb798aa9e3501af82ae46975e881d80626.tar.bz2
mpv-6179dcbb798aa9e3501af82ae46975e881d80626.tar.xz
vo_gpu: redesign peak detection algorithm
The previous approach of using an FIR with tunable hard threshold for scene changes had several problems: - the FIR involved annoying hard-coded buffer sizes, high VRAM usage, and the FIR sum was prone to numerical overflow which limited the number of frames we could average over. We also totally redesign the scene change detection. - the hard scene change detection was prone to both false positives and false negatives, each with their own (annoying) issues. Scrap this entirely and switch to a dual approach of using a simple single-pole IIR low pass filter to smooth out noise, while using a softer scene change curve (with tunable low and high thresholds), based on `smoothstep`. The IIR filter is extremely simple in its implementation and has an arbitrarily user-tunable cutoff frequency, while the smoothstep-based scene change curve provides a good, tunable tradeoff between adaptation speed and stability - without exhibiting either of the traditional issues associated with the hard cutoff. Another way to think about the new options is that the "low threshold" provides a margin of error within which we don't care about small fluctuations in the scene (which will therefore be smoothed out by the IIR filter).
-rw-r--r--DOCS/interface-changes.rst1
-rw-r--r--DOCS/man/options.rst24
-rw-r--r--video/out/gpu/video.c36
-rw-r--r--video/out/gpu/video.h6
-rw-r--r--video/out/gpu/video_shaders.c96
5 files changed, 86 insertions, 77 deletions
diff --git a/DOCS/interface-changes.rst b/DOCS/interface-changes.rst
index 7e723b9dbe..ce7e33176a 100644
--- a/DOCS/interface-changes.rst
+++ b/DOCS/interface-changes.rst
@@ -51,6 +51,7 @@ Interface changes
only using a single value (which previously just controlled the exponent).
The strength now linearly blends between the linear and nonlinear tone
mapped versions of a color.
+ - add --hdr-peak-decay-rate and --hdr-scene-threshold-low/high
--- mpv 0.29.0 ---
- drop --opensles-sample-rate, as --audio-samplerate should be used if desired
- drop deprecated --videotoolbox-format, --ff-aid, --ff-vid, --ff-sid,
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 1c08917d7a..0f7007bf89 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -5245,6 +5245,30 @@ The following video options are currently all specific to ``--vo=gpu`` and
The special value ``auto`` (default) will enable HDR peak computation
automatically if compute shaders and SSBOs are supported.
+``--hdr-peak-decay-rate=<1.0..1000.0>``
+ The decay rate used for the HDR peak detection algorithm (default: 100.0).
+ This is only relevant when ``--hdr-compute-peak`` is enabled. Higher values
+ make the peak decay more slowly, leading to more stable values at the cost
+ of more "eye adaptation"-like effects (although this is mitigated somewhat
+ by ``--hdr-scene-threshold``). A value of 1.0 (the lowest possible) disables
+ all averaging, meaning each frame's value is used directly as measured,
+ but doing this is not recommended for "noisy" sources since it may lead
+ to excessive flicker. (In signal theory terms, this controls the time
+ constant "tau" of an IIR low pass filter)
+
+``--hdr-scene-threshold-low=<0..10000>``, ``--hdr-scene-threshold-high=<0..10000>``
+ The lower and upper thresholds (in cd/m^2) for a brightness difference to
+ be considered a scene change (default: 50 low, 200 high). This is only
+ relevant when ``--hdr-compute-peak`` is enabled. Normally, small
+ fluctuations in the frame brightness are compensated for by the peak
+ averaging mechanism, but for large jumps in the brightness this can result
+ in the frame remaining too bright or too dark for up to several seconds,
+ depending on the value of ``--hdr-peak-decay-rate``. To counteract this,
+ when the brightness between the running average and the current frame
+ exceeds the low threshold, mpv will make the averaging filter more
+ aggressive, up to the limit of the high threshold (at which point the
+ filter becomes instant).
+
``--tone-mapping-desaturate=<0.0..1.0>``
Apply desaturation for highlights (default: 0.75). The parameter controls
the strength of the desaturation curve. A value of 0.0 completely disables
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 9ffdc62d20..a29f09bc3d 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -316,6 +316,9 @@ static const struct gl_video_opts gl_video_opts_def = {
.tone_map = {
.curve = TONE_MAPPING_HABLE,
.curve_param = NAN,
+ .decay_rate = 100.0,
+ .scene_threshold_low = 50,
+ .scene_threshold_high = 200,
.desat = 0.75,
.desat_exp = 1.5,
},
@@ -367,6 +370,11 @@ const struct m_sub_options gl_video_conf = {
({"auto", 0},
{"yes", 1},
{"no", -1})),
+ OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0),
+ OPT_INTRANGE("hdr-scene-threshold-low",
+ tone_map.scene_threshold_low, 0, 0, 10000),
+ OPT_INTRANGE("hdr-scene-threshold-high",
+ tone_map.scene_threshold_high, 0, 0, 10000),
OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0),
OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
OPT_FLOATRANGE("tone-mapping-desaturate-exponent",
@@ -2478,17 +2486,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
dst.sig_peak = mp_trc_nom_peak(dst.gamma);
struct gl_tone_map_opts tone_map = p->opts.tone_map;
- bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma);
+ bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma)
+ && src.sig_peak > dst.sig_peak;
+
if (detect_peak && !p->hdr_peak_ssbo) {
struct {
+ float average[2];
+ uint32_t frame_sum;
+ uint32_t frame_max;
uint32_t counter;
- uint32_t frame_idx;
- uint32_t frame_num;
- uint32_t frame_max[PEAK_DETECT_FRAMES+1];
- uint32_t frame_sum[PEAK_DETECT_FRAMES+1];
- uint32_t total_max;
- uint32_t total_sum;
- } peak_ssbo = {0};
+ } peak_ssbo = {
+ .average = { 0.25, src.sig_peak },
+ };
struct ra_buf_params params = {
.type = RA_BUF_TYPE_SHADER_STORAGE,
@@ -2508,15 +2517,10 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
pass_describe(p, "detect HDR peak");
pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
+ "vec2 average;"
+ "uint frame_sum;"
+ "uint frame_max;"
"uint counter;"
- "uint frame_idx;"
- "uint frame_num;"
- "uint frame_max[%d];"
- "uint frame_avg[%d];"
- "uint total_max;"
- "uint total_avg;",
- PEAK_DETECT_FRAMES + 1,
- PEAK_DETECT_FRAMES + 1
);
}
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index ee5c0a2861..077f69332f 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -95,13 +95,13 @@ enum tone_mapping {
TONE_MAPPING_LINEAR,
};
-// How many frames to average over for HDR peak detection
-#define PEAK_DETECT_FRAMES 63
-
struct gl_tone_map_opts {
int curve;
float curve_param;
int compute_peak;
+ float decay_rate;
+ int scene_threshold_low;
+ int scene_threshold_high;
float desat;
float desat_exp;
int gamut_warning; // bool
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 315e15cc89..0fff8f05f2 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -567,75 +567,55 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh
// under a typical presentation gamma of about 2.0.
static const float sdr_avg = 0.25;
-// The threshold for which to consider an average luminance difference to be
-// a sign of a scene change.
-static const int scene_threshold = 0.2 * MP_REF_WHITE;
-
-static void hdr_update_peak(struct gl_shader_cache *sc)
+static void hdr_update_peak(struct gl_shader_cache *sc,
+ const struct gl_tone_map_opts *opts)
{
- // For performance, we want to do as few atomic operations on global
- // memory as possible, so use an atomic in shmem for the work group.
- GLSLH(shared uint wg_sum;);
- GLSL(wg_sum = 0;)
-
- // Have each thread update the work group sum with the local value
+ // Update the sig_peak/sig_avg from the old SSBO state
+ GLSL(sig_avg = max(1e-3, average.x);)
+ GLSL(sig_peak = max(1.00, average.y);)
+
+ // For performance, and to avoid overflows, we tally up the sub-results per
+ // pixel using shared memory first
+ GLSLH(shared uint wg_sum;)
+ GLSLH(shared uint wg_max;)
+ GLSL(wg_sum = wg_max = 0;)
GLSL(barrier();)
- GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE);
+ GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
+ GLSL(atomicAdd(wg_sum, sig_uint);)
+ GLSL(atomicMax(wg_max, sig_uint);)
- // Have one thread per work group update the global atomics. We use the
- // work group average even for the global sum, to make the values slightly
- // more stable and smooth out tiny super-highlights.
+ // Have one thread per work group update the global atomics
GLSL(memoryBarrierShared();)
GLSL(barrier();)
GLSL(if (gl_LocalInvocationIndex == 0) {)
GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
- GLSL( atomicMax(frame_max[frame_idx], wg_avg);)
- GLSL( atomicAdd(frame_avg[frame_idx], wg_avg);)
+ GLSL( atomicAdd(frame_sum, wg_avg);)
+ GLSL( atomicMax(frame_max, wg_max);)
+ GLSL( memoryBarrierBuffer();)
GLSL(})
-
- const float refi = 1.0 / MP_REF_WHITE;
-
- // Update the sig_peak/sig_avg from the old SSBO state
- GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
- GLSL(if (frame_num > 0) {)
- GLSLF(" float peak = %f * float(total_max) / float(frame_num);\n", refi);
- GLSLF(" float avg = %f * float(total_avg) / float(frame_num);\n", refi);
- GLSLF(" sig_peak = max(1.0, peak);\n");
- GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg);
- GLSL(});
+ GLSL(barrier();)
// Finally, to update the global state, we increment a counter per dispatch
- GLSL(memoryBarrierBuffer();)
- GLSL(barrier();)
+ GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
-
- // Since we sum up all the workgroups, we also still need to divide the
- // average by the number of work groups
GLSL( counter = 0;)
- GLSL( frame_avg[frame_idx] /= num_wg;)
- GLSL( uint cur_max = frame_max[frame_idx];)
- GLSL( uint cur_avg = frame_avg[frame_idx];)
-
- // Scene change detection
- GLSL( int diff = int(frame_num * cur_avg) - int(total_avg);)
- GLSLF(" if (abs(diff) > frame_num * %d) {\n", scene_threshold);
- GLSL( frame_num = 0;)
- GLSL( total_max = total_avg = 0;)
- GLSLF(" for (uint i = 0; i < %d; i++)\n", PEAK_DETECT_FRAMES+1);
- GLSL( frame_max[i] = frame_avg[i] = 0;)
- GLSL( frame_max[frame_idx] = cur_max;)
- GLSL( frame_avg[frame_idx] = cur_avg;)
- GLSL( })
-
- // Add the current frame, then subtract and reset the next frame
- GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
- GLSL( total_max += cur_max - frame_max[next];)
- GLSL( total_avg += cur_avg - frame_avg[next];)
- GLSL( frame_max[next] = frame_avg[next] = 0;)
-
- // Update the index and count
- GLSL( frame_idx = next;)
- GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES);
+ GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
+ GLSLF(" cur *= 1.0/%f;\n", MP_REF_WHITE);
+
+ // Use an IIR low-pass filter to smooth out the detected values, with a
+ // configurable decay rate based on the desired time constant (tau)
+ float a = 1.0 - cos(1.0 / opts->decay_rate);
+ float decay = sqrt(a*a + 2*a) - a;
+ GLSLF(" average += %f * (cur - average);\n", decay);
+
+ // Scene change hysteresis
+ GLSLF(" float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n",
+ (float) opts->scene_threshold_low / MP_REF_WHITE,
+ (float) opts->scene_threshold_high / MP_REF_WHITE);
+ GLSL( average = mix(average, cur, weight);)
+
+ // Reset SSBO state for the next frame
+ GLSL( frame_max = frame_sum = 0;)
GLSL( memoryBarrierBuffer();)
GLSL(})
}
@@ -659,7 +639,7 @@ static void pass_tone_map(struct gl_shader_cache *sc,
GLSLF("float sig_avg = %f;\n", sdr_avg);
if (opts->compute_peak >= 0)
- hdr_update_peak(sc);
+ hdr_update_peak(sc, opts);
GLSLF("vec3 sig = color.rgb;\n");