diff options
-rw-r--r-- | DOCS/interface-changes.rst | 1 | ||||
-rw-r--r-- | DOCS/man/options.rst | 24 | ||||
-rw-r--r-- | video/out/gpu/video.c | 36 | ||||
-rw-r--r-- | video/out/gpu/video.h | 6 | ||||
-rw-r--r-- | video/out/gpu/video_shaders.c | 96 |
5 files changed, 86 insertions, 77 deletions
diff --git a/DOCS/interface-changes.rst b/DOCS/interface-changes.rst index 7e723b9dbe..ce7e33176a 100644 --- a/DOCS/interface-changes.rst +++ b/DOCS/interface-changes.rst @@ -51,6 +51,7 @@ Interface changes only using a single value (which previously just controlled the exponent). The strength now linearly blends between the linear and nonlinear tone mapped versions of a color. + - add --hdr-peak-decay-rate and --hdr-scene-threshold-low/high --- mpv 0.29.0 --- - drop --opensles-sample-rate, as --audio-samplerate should be used if desired - drop deprecated --videotoolbox-format, --ff-aid, --ff-vid, --ff-sid, diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index 1c08917d7a..0f7007bf89 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -5245,6 +5245,30 @@ The following video options are currently all specific to ``--vo=gpu`` and The special value ``auto`` (default) will enable HDR peak computation automatically if compute shaders and SSBOs are supported. +``--hdr-peak-decay-rate=<1.0..1000.0>`` + The decay rate used for the HDR peak detection algorithm (default: 100.0). + This is only relevant when ``--hdr-compute-peak`` is enabled. Higher values + make the peak decay more slowly, leading to more stable values at the cost + of more "eye adaptation"-like effects (although this is mitigated somewhat + by ``--hdr-scene-threshold``). A value of 1.0 (the lowest possible) disables + all averaging, meaning each frame's value is used directly as measured, + but doing this is not recommended for "noisy" sources since it may lead + to excessive flicker. (In signal theory terms, this controls the time + constant "tau" of an IIR low pass filter) + +``--hdr-scene-threshold-low=<0..10000>``, ``--hdr-scene-threshold-high=<0..10000>`` + The lower and upper thresholds (in cd/m^2) for a brightness difference to + be considered a scene change (default: 50 low, 200 high). This is only + relevant when ``--hdr-compute-peak`` is enabled. Normally, small + fluctuations in the frame brightness are compensated for by the peak + averaging mechanism, but for large jumps in the brightness this can result + in the frame remaining too bright or too dark for up to several seconds, + depending on the value of ``--hdr-peak-decay-rate``. To counteract this, + when the brightness between the running average and the current frame + exceeds the low threshold, mpv will make the averaging filter more + aggressive, up to the limit of the high threshold (at which point the + filter becomes instant). + ``--tone-mapping-desaturate=<0.0..1.0>`` Apply desaturation for highlights (default: 0.75). The parameter controls the strength of the desaturation curve. A value of 0.0 completely disables diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 9ffdc62d20..a29f09bc3d 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -316,6 +316,9 @@ static const struct gl_video_opts gl_video_opts_def = { .tone_map = { .curve = TONE_MAPPING_HABLE, .curve_param = NAN, + .decay_rate = 100.0, + .scene_threshold_low = 50, + .scene_threshold_high = 200, .desat = 0.75, .desat_exp = 1.5, }, @@ -367,6 +370,11 @@ const struct m_sub_options gl_video_conf = { ({"auto", 0}, {"yes", 1}, {"no", -1})), + OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0), + OPT_INTRANGE("hdr-scene-threshold-low", + tone_map.scene_threshold_low, 0, 0, 10000), + OPT_INTRANGE("hdr-scene-threshold-high", + tone_map.scene_threshold_high, 0, 0, 10000), OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0), OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0), OPT_FLOATRANGE("tone-mapping-desaturate-exponent", @@ -2478,17 +2486,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool dst.sig_peak = mp_trc_nom_peak(dst.gamma); struct gl_tone_map_opts tone_map = p->opts.tone_map; - bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma); + bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma) + && src.sig_peak > dst.sig_peak; + if (detect_peak && !p->hdr_peak_ssbo) { struct { + float average[2]; + uint32_t frame_sum; + uint32_t frame_max; uint32_t counter; - uint32_t frame_idx; - uint32_t frame_num; - uint32_t frame_max[PEAK_DETECT_FRAMES+1]; - uint32_t frame_sum[PEAK_DETECT_FRAMES+1]; - uint32_t total_max; - uint32_t total_sum; - } peak_ssbo = {0}; + } peak_ssbo = { + .average = { 0.25, src.sig_peak }, + }; struct ra_buf_params params = { .type = RA_BUF_TYPE_SHADER_STORAGE, @@ -2508,15 +2517,10 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool pass_describe(p, "detect HDR peak"); pass_is_compute(p, 8, 8, true); // 8x8 is good for performance gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, + "vec2 average;" + "uint frame_sum;" + "uint frame_max;" "uint counter;" - "uint frame_idx;" - "uint frame_num;" - "uint frame_max[%d];" - "uint frame_avg[%d];" - "uint total_max;" - "uint total_avg;", - PEAK_DETECT_FRAMES + 1, - PEAK_DETECT_FRAMES + 1 ); } diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index ee5c0a2861..077f69332f 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -95,13 +95,13 @@ enum tone_mapping { TONE_MAPPING_LINEAR, }; -// How many frames to average over for HDR peak detection -#define PEAK_DETECT_FRAMES 63 - struct gl_tone_map_opts { int curve; float curve_param; int compute_peak; + float decay_rate; + int scene_threshold_low; + int scene_threshold_high; float desat; float desat_exp; int gamut_warning; // bool diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 315e15cc89..0fff8f05f2 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -567,75 +567,55 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh // under a typical presentation gamma of about 2.0. static const float sdr_avg = 0.25; -// The threshold for which to consider an average luminance difference to be -// a sign of a scene change. -static const int scene_threshold = 0.2 * MP_REF_WHITE; - -static void hdr_update_peak(struct gl_shader_cache *sc) +static void hdr_update_peak(struct gl_shader_cache *sc, + const struct gl_tone_map_opts *opts) { - // For performance, we want to do as few atomic operations on global - // memory as possible, so use an atomic in shmem for the work group. - GLSLH(shared uint wg_sum;); - GLSL(wg_sum = 0;) - - // Have each thread update the work group sum with the local value + // Update the sig_peak/sig_avg from the old SSBO state + GLSL(sig_avg = max(1e-3, average.x);) + GLSL(sig_peak = max(1.00, average.y);) + + // For performance, and to avoid overflows, we tally up the sub-results per + // pixel using shared memory first + GLSLH(shared uint wg_sum;) + GLSLH(shared uint wg_max;) + GLSL(wg_sum = wg_max = 0;) GLSL(barrier();) - GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE); + GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE); + GLSL(atomicAdd(wg_sum, sig_uint);) + GLSL(atomicMax(wg_max, sig_uint);) - // Have one thread per work group update the global atomics. We use the - // work group average even for the global sum, to make the values slightly - // more stable and smooth out tiny super-highlights. + // Have one thread per work group update the global atomics GLSL(memoryBarrierShared();) GLSL(barrier();) GLSL(if (gl_LocalInvocationIndex == 0) {) GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);) - GLSL( atomicMax(frame_max[frame_idx], wg_avg);) - GLSL( atomicAdd(frame_avg[frame_idx], wg_avg);) + GLSL( atomicAdd(frame_sum, wg_avg);) + GLSL( atomicMax(frame_max, wg_max);) + GLSL( memoryBarrierBuffer();) GLSL(}) - - const float refi = 1.0 / MP_REF_WHITE; - - // Update the sig_peak/sig_avg from the old SSBO state - GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) - GLSL(if (frame_num > 0) {) - GLSLF(" float peak = %f * float(total_max) / float(frame_num);\n", refi); - GLSLF(" float avg = %f * float(total_avg) / float(frame_num);\n", refi); - GLSLF(" sig_peak = max(1.0, peak);\n"); - GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg); - GLSL(}); + GLSL(barrier();) // Finally, to update the global state, we increment a counter per dispatch - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) + GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {) - - // Since we sum up all the workgroups, we also still need to divide the - // average by the number of work groups GLSL( counter = 0;) - GLSL( frame_avg[frame_idx] /= num_wg;) - GLSL( uint cur_max = frame_max[frame_idx];) - GLSL( uint cur_avg = frame_avg[frame_idx];) - - // Scene change detection - GLSL( int diff = int(frame_num * cur_avg) - int(total_avg);) - GLSLF(" if (abs(diff) > frame_num * %d) {\n", scene_threshold); - GLSL( frame_num = 0;) - GLSL( total_max = total_avg = 0;) - GLSLF(" for (uint i = 0; i < %d; i++)\n", PEAK_DETECT_FRAMES+1); - GLSL( frame_max[i] = frame_avg[i] = 0;) - GLSL( frame_max[frame_idx] = cur_max;) - GLSL( frame_avg[frame_idx] = cur_avg;) - GLSL( }) - - // Add the current frame, then subtract and reset the next frame - GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); - GLSL( total_max += cur_max - frame_max[next];) - GLSL( total_avg += cur_avg - frame_avg[next];) - GLSL( frame_max[next] = frame_avg[next] = 0;) - - // Update the index and count - GLSL( frame_idx = next;) - GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES); + GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) + GLSLF(" cur *= 1.0/%f;\n", MP_REF_WHITE); + + // Use an IIR low-pass filter to smooth out the detected values, with a + // configurable decay rate based on the desired time constant (tau) + float a = 1.0 - cos(1.0 / opts->decay_rate); + float decay = sqrt(a*a + 2*a) - a; + GLSLF(" average += %f * (cur - average);\n", decay); + + // Scene change hysteresis + GLSLF(" float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n", + (float) opts->scene_threshold_low / MP_REF_WHITE, + (float) opts->scene_threshold_high / MP_REF_WHITE); + GLSL( average = mix(average, cur, weight);) + + // Reset SSBO state for the next frame + GLSL( frame_max = frame_sum = 0;) GLSL( memoryBarrierBuffer();) GLSL(}) } @@ -659,7 +639,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, GLSLF("float sig_avg = %f;\n", sdr_avg); if (opts->compute_peak >= 0) - hdr_update_peak(sc); + hdr_update_peak(sc, opts); GLSLF("vec3 sig = color.rgb;\n"); |