summaryrefslogtreecommitdiffstats
path: root/video/out
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2019-01-02 07:18:29 +0100
committerJan Ekström <jeebjp@gmail.com>2019-02-18 01:54:06 +0200
commitfdd671188d7edb8d150ec2c93656fb80bf031f12 (patch)
tree4fc415e7cf8cfbf3de5891f5685a1c875bf9cde0 /video/out
parent12e58ff8a65c537a222a3fb954f88d98a3a5bfd2 (diff)
downloadmpv-fdd671188d7edb8d150ec2c93656fb80bf031f12.tar.bz2
mpv-fdd671188d7edb8d150ec2c93656fb80bf031f12.tar.xz
vo_gpu: improve accuracy of HDR brightness estimation
This change switches to a logarithmic mean to estimate the average signal brightness. This handles dark scenes with isolated highlights much more faithfully than the linear mean did, since the log of the signal roughly corresponds to the perceptual brightness.
Diffstat (limited to 'video/out')
-rw-r--r--video/out/gpu/video.c4
-rw-r--r--video/out/gpu/video_shaders.c20
2 files changed, 14 insertions, 10 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 6bf0bb31a1..be49551dfb 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2494,7 +2494,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
if (detect_peak && !p->hdr_peak_ssbo) {
struct {
float average[2];
- uint32_t frame_sum;
+ int32_t frame_sum;
uint32_t frame_max;
uint32_t counter;
} peak_ssbo = {
@@ -2520,7 +2520,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
"vec2 average;"
- "uint frame_sum;"
+ "int frame_sum;"
"uint frame_max;"
"uint counter;"
);
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index fbccd56eb3..127db58ea2 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -574,21 +574,24 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
GLSL(sig_avg = max(1e-3, average.x);)
GLSL(sig_peak = max(1.00, average.y);)
+ // Chosen to avoid overflowing on an 8K buffer
+ const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
+
// For performance, and to avoid overflows, we tally up the sub-results per
// pixel using shared memory first
- GLSLH(shared uint wg_sum;)
+ GLSLH(shared int wg_sum;)
GLSLH(shared uint wg_max;)
- GLSL(wg_sum = wg_max = 0;)
+ GLSL(wg_sum = 0; wg_max = 0;)
GLSL(barrier();)
- GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
- GLSL(atomicAdd(wg_sum, sig_uint);)
- GLSL(atomicMax(wg_max, sig_uint);)
+ GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min);
+ GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale);
+ GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale);
// Have one thread per work group update the global atomics
GLSL(memoryBarrierShared();)
GLSL(barrier();)
GLSL(if (gl_LocalInvocationIndex == 0) {)
- GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
+ GLSL( int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
GLSL( atomicAdd(frame_sum, wg_avg);)
GLSL( atomicMax(frame_max, wg_max);)
GLSL( memoryBarrierBuffer();)
@@ -600,7 +603,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
GLSL( counter = 0;)
GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
- GLSLF(" cur *= 1.0/%f;\n", MP_REF_WHITE);
+ GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale);
+ GLSL( cur.x = exp(cur.x);)
// Use an IIR low-pass filter to smooth out the detected values, with a
// configurable decay rate based on the desired time constant (tau)
@@ -615,7 +619,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
GLSL( average = mix(average, cur, weight);)
// Reset SSBO state for the next frame
- GLSL( frame_max = frame_sum = 0;)
+ GLSL( frame_sum = 0; frame_max = 0;)
GLSL( memoryBarrierBuffer();)
GLSL(})
}