summaryrefslogtreecommitdiffstats
path: root/video/out/gpu/video_shaders.c
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2019-01-02 07:18:29 +0100
committerJan Ekström <jeebjp@gmail.com>2019-02-18 01:54:06 +0200
commitfdd671188d7edb8d150ec2c93656fb80bf031f12 (patch)
tree4fc415e7cf8cfbf3de5891f5685a1c875bf9cde0 /video/out/gpu/video_shaders.c
parent12e58ff8a65c537a222a3fb954f88d98a3a5bfd2 (diff)
downloadmpv-fdd671188d7edb8d150ec2c93656fb80bf031f12.tar.bz2
mpv-fdd671188d7edb8d150ec2c93656fb80bf031f12.tar.xz
vo_gpu: improve accuracy of HDR brightness estimation
This change switches to a logarithmic mean to estimate the average signal brightness. This handles dark scenes with isolated highlights much more faithfully than the linear mean did, since the log of the signal roughly corresponds to the perceptual brightness.
Diffstat (limited to 'video/out/gpu/video_shaders.c')
-rw-r--r--video/out/gpu/video_shaders.c20
1 files changed, 12 insertions, 8 deletions
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index fbccd56eb3..127db58ea2 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -574,21 +574,24 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
GLSL(sig_avg = max(1e-3, average.x);)
GLSL(sig_peak = max(1.00, average.y);)
+ // Chosen to avoid overflowing on an 8K buffer
+ const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
+
// For performance, and to avoid overflows, we tally up the sub-results per
// pixel using shared memory first
- GLSLH(shared uint wg_sum;)
+ GLSLH(shared int wg_sum;)
GLSLH(shared uint wg_max;)
- GLSL(wg_sum = wg_max = 0;)
+ GLSL(wg_sum = 0; wg_max = 0;)
GLSL(barrier();)
- GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
- GLSL(atomicAdd(wg_sum, sig_uint);)
- GLSL(atomicMax(wg_max, sig_uint);)
+ GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min);
+ GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale);
+ GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale);
// Have one thread per work group update the global atomics
GLSL(memoryBarrierShared();)
GLSL(barrier();)
GLSL(if (gl_LocalInvocationIndex == 0) {)
- GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
+ GLSL( int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
GLSL( atomicAdd(frame_sum, wg_avg);)
GLSL( atomicMax(frame_max, wg_max);)
GLSL( memoryBarrierBuffer();)
@@ -600,7 +603,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
GLSL( counter = 0;)
GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
- GLSLF(" cur *= 1.0/%f;\n", MP_REF_WHITE);
+ GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale);
+ GLSL( cur.x = exp(cur.x);)
// Use an IIR low-pass filter to smooth out the detected values, with a
// configurable decay rate based on the desired time constant (tau)
@@ -615,7 +619,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
GLSL( average = mix(average, cur, weight);)
// Reset SSBO state for the next frame
- GLSL( frame_max = frame_sum = 0;)
+ GLSL( frame_sum = 0; frame_max = 0;)
GLSL( memoryBarrierBuffer();)
GLSL(})
}