From 08679f756cf0387d883247cd7e4e8b24ca6543f5 Mon Sep 17 00:00:00 2001 From: Akemi Date: Sun, 16 Dec 2018 13:07:11 +0100 Subject: vd_lavc: increase the possible length of the hwdec name this lead to an unexpected videotoolbox-copy hwdec name due to the last two chars being cut off. since selection is also done by that name one had to use "videotoolbox-co" to explicitly use the copy mode of videotoolbox. --- video/decode/vd_lavc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'video') diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c index 25049aa341..cce3b4510b 100644 --- a/video/decode/vd_lavc.c +++ b/video/decode/vd_lavc.c @@ -132,7 +132,7 @@ const struct m_sub_options vd_lavc_conf = { struct hwdec_info { char name[64]; - char method_name[16]; // non-unique name describing the hwdec method + char method_name[24]; // non-unique name describing the hwdec method const AVCodec *codec; // implemented by this codec enum AVHWDeviceType lavc_device; // if not NONE, get a hwdevice bool copying; // if true, outputs sw frames, or copy to sw ourselves -- cgit v1.2.3 From 05f0980b9610c3d0f75f8004578ae61d3e3145e4 Mon Sep 17 00:00:00 2001 From: Kotori Itsuka Date: Thu, 17 Jan 2019 20:20:37 +1000 Subject: vo_gpu: allow resetting target-peak to the trc default Add "auto" the possible values of target-peak. The default value for target_peak is to calculate the target using mp_trc_nom_peak. Unfortunately, this default was outside the acceptable range of 10-10000 nits, which prevented its later reassignment. So add an "auto" choice to target-peak which lets clients and scripts go back to using the trc default after assigning a value. --- video/out/gpu/video.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index b0fa9eb4d9..c12fb8536c 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -351,7 +351,8 @@ const struct m_sub_options gl_video_conf = { OPT_FLAG("gamma-auto", gamma_auto, 0), OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), - OPT_INTRANGE("target-peak", target_peak, 0, 10, 10000), + OPT_CHOICE_OR_INT("target-peak", target_peak, 0, 10, 10000, + ({"auto", 0})), OPT_CHOICE("tone-mapping", tone_mapping, 0, ({"clip", TONE_MAPPING_CLIP}, {"mobius", TONE_MAPPING_MOBIUS}, -- cgit v1.2.3 From a4298b1a6984319f0d4fe6cc4ab21164d8ae423c Mon Sep 17 00:00:00 2001 From: Akemi Date: Sat, 19 Jan 2019 11:43:31 +0100 Subject: cocoa-cb: fix side by side Split View again some safety mechanism for the async fs animation aren't needed anymore, due to possible improved logic and slightly different behaviour on new macOS versions. that safety fallback prevented the Split View because it always returned a rectangle of the whole screen, instead of just part/half of it. Fixes #6443 --- video/out/cocoa-cb/window.swift | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'video') diff --git a/video/out/cocoa-cb/window.swift b/video/out/cocoa-cb/window.swift index d11706f38b..ad93a9cebd 100644 --- a/video/out/cocoa-cb/window.swift +++ b/video/out/cocoa-cb/window.swift @@ -300,6 +300,7 @@ class Window: NSWindow, NSWindowDelegate { let intermediateFrame = aspectFit(rect: newFrame, in: screen!.frame) cocoaCB.view.layerContentsPlacement = .scaleProportionallyToFill hideTitleBar() + styleMask.remove(.fullScreen) setFrame(intermediateFrame, display: true) NSAnimationContext.runAnimationGroup({ (context) -> Void in @@ -435,9 +436,7 @@ class Window: NSWindow, NSWindowDelegate { } override func setFrame(_ frameRect: NSRect, display flag: Bool) { - let newFrame = !isAnimating && isInFullscreen ? targetScreen!.frame : - frameRect - super.setFrame(newFrame, display: flag) + super.setFrame(frameRect, display: flag) if keepAspect { contentAspectRatio = unfsContentFrame!.size -- cgit v1.2.3 From ace61c120f18733f3cfc88273fdbad6fb1db5bc6 Mon Sep 17 00:00:00 2001 From: Akemi Date: Sun, 30 Sep 2018 11:36:14 +0200 Subject: cocoa-cb: use Swift Extensions for convenience preparations for the following commit. --- video/out/cocoa_cb_common.swift | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'video') diff --git a/video/out/cocoa_cb_common.swift b/video/out/cocoa_cb_common.swift index 355fa537e1..41a571d1d0 100644 --- a/video/out/cocoa_cb_common.swift +++ b/video/out/cocoa_cb_common.swift @@ -148,10 +148,9 @@ class CocoaCB: NSObject { func startDisplayLink(_ vo: UnsafeMutablePointer) { let opts: mp_vo_opts = vo.pointee.opts.pointee let screen = getScreenBy(id: Int(opts.screen_id)) ?? NSScreen.main() - let displayId = screen!.deviceDescription["NSScreenNumber"] as! UInt32 CVDisplayLinkCreateWithActiveCGDisplays(&link) - CVDisplayLinkSetCurrentCGDisplay(link!, displayId) + CVDisplayLinkSetCurrentCGDisplay(link!, screen!.displayID) if #available(macOS 10.12, *) { CVDisplayLinkSetOutputHandler(link!) { link, now, out, inFlags, outFlags -> CVReturn in self.mpv.reportRenderFlip() @@ -170,8 +169,7 @@ class CocoaCB: NSObject { } func updateDisplaylink() { - let displayId = UInt32(window.screen!.deviceDescription["NSScreenNumber"] as! Int) - CVDisplayLinkSetCurrentCGDisplay(link!, displayId) + CVDisplayLinkSetCurrentCGDisplay(link!, window.screen!.displayID) queue.asyncAfter(deadline: DispatchTime.now() + 0.1) { self.flagEvents(VO_EVENT_WIN_STATE) @@ -302,9 +300,8 @@ class CocoaCB: NSObject { var reconfigureCallback: CGDisplayReconfigurationCallBack = { (display, flags, userInfo) in if flags.contains(.setModeFlag) { let ccb: CocoaCB = MPVHelper.bridge(ptr: userInfo!) - let displayID = (ccb.window.screen!.deviceDescription["NSScreenNumber"] as! NSNumber).intValue - if UInt32(displayID) == display { - ccb.mpv.sendVerbose("Detected display mode change, updating screen refresh rate\n"); + if ccb.window.screen!.displayID == display { + ccb.mpv.sendVerbose("Detected display mode change, updating screen refresh rate"); ccb.flagEvents(VO_EVENT_WIN_STATE) } } -- cgit v1.2.3 From 6ce570359aa06469d3ead822227058ec87c86b30 Mon Sep 17 00:00:00 2001 From: Akemi Date: Wed, 26 Sep 2018 15:33:34 +0200 Subject: cocoa-cb: add support for VOCTRL_GET_DISPLAY_NAMES --- video/out/cocoa_cb_common.swift | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'video') diff --git a/video/out/cocoa_cb_common.swift b/video/out/cocoa_cb_common.swift index 41a571d1d0..ae79144d97 100644 --- a/video/out/cocoa_cb_common.swift +++ b/video/out/cocoa_cb_common.swift @@ -420,6 +420,20 @@ class CocoaCB: NSObject { let minimized = data!.assumingMemoryBound(to: Int32.self) minimized.pointee = ccb.window.isMiniaturized ? VO_WIN_STATE_MINIMIZED : Int32(0) return VO_TRUE + case VOCTRL_GET_DISPLAY_NAMES: + let opts: mp_vo_opts = vo!.pointee.opts!.pointee + let dnames = data!.assumingMemoryBound(to: UnsafeMutablePointer?>?.self) + var array: UnsafeMutablePointer?>? = nil + var count: Int32 = 0 + let screen = ccb.window != nil ? ccb.window.screen : + ccb.getScreenBy(id: Int(opts.screen_id)) ?? + NSScreen.main() + let displayName = screen?.displayName ?? "Unknown" + + SWIFT_TARRAY_STRING_APPEND(nil, &array, &count, ta_xstrdup(nil, displayName)) + SWIFT_TARRAY_STRING_APPEND(nil, &array, &count, nil) + dnames.pointee = array + return VO_TRUE case VOCTRL_UPDATE_WINDOW_TITLE: let titleData = data!.assumingMemoryBound(to: Int8.self) let title = String(cString: titleData) -- cgit v1.2.3 From 777a863bb6c3829dad617154ba3574e9592b85f9 Mon Sep 17 00:00:00 2001 From: Akemi Date: Thu, 25 Oct 2018 19:39:59 +0200 Subject: cocoa-cb: remove empty elements from dropped URLs Fixes #6241 --- video/out/cocoa-cb/events_view.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'video') diff --git a/video/out/cocoa-cb/events_view.swift b/video/out/cocoa-cb/events_view.swift index 5a84d27b2b..667366285e 100644 --- a/video/out/cocoa-cb/events_view.swift +++ b/video/out/cocoa-cb/events_view.swift @@ -75,7 +75,8 @@ class EventsView: NSView { return true } } else if types.contains(NSURLPboardType) { - if let url = pb.propertyList(forType: NSURLPboardType) as? [Any] { + if var url = pb.propertyList(forType: NSURLPboardType) as? [String] { + url = url.filter{ !$0.isEmpty } EventsResponder.sharedInstance().handleFilesArray(url) return true } -- cgit v1.2.3 From 36600ff1633871a996fe05b8e0ff0b22c2ebb0f9 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Fri, 15 Feb 2019 12:52:15 +0100 Subject: =?UTF-8?q?wayland=5Fcommon:=20rename=20=E2=80=9Cshell=E2=80=9D=20?= =?UTF-8?q?into=20=E2=80=9Cwm=5Fbase=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the naming xdg-shell stable adopted, it doesn’t make much sense to keep using “shell” everywhere with all functions calling it “wm_base”. Finishes what 76211609e3c589dafe3ef9a36cacc06e8f56de09 started. --- video/out/wayland_common.c | 20 ++++++++++---------- video/out/wayland_common.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'video') diff --git a/video/out/wayland_common.c b/video/out/wayland_common.c index 45071c8f35..a929c2619e 100644 --- a/video/out/wayland_common.c +++ b/video/out/wayland_common.c @@ -35,13 +35,13 @@ // Generated from xdg-decoration-unstable-v1.xml #include "video/out/wayland/xdg-decoration-v1.h" -static void xdg_shell_ping(void *data, struct xdg_wm_base *shell, uint32_t serial) +static void xdg_wm_base_ping(void *data, struct xdg_wm_base *wm_base, uint32_t serial) { - xdg_wm_base_pong(shell, serial); + xdg_wm_base_pong(wm_base, serial); } -static const struct xdg_wm_base_listener xdg_shell_listener = { - xdg_shell_ping, +static const struct xdg_wm_base_listener xdg_wm_base_listener = { + xdg_wm_base_ping, }; static int spawn_cursor(struct vo_wayland_state *wl) @@ -806,8 +806,8 @@ static void registry_handle_add(void *data, struct wl_registry *reg, uint32_t id if (!strcmp(interface, xdg_wm_base_interface.name) && found++) { ver = MPMIN(ver, 2); /* We can use either 1 or 2 */ - wl->shell = wl_registry_bind(reg, id, &xdg_wm_base_interface, ver); - xdg_wm_base_add_listener(wl->shell, &xdg_shell_listener, wl); + wl->wm_base = wl_registry_bind(reg, id, &xdg_wm_base_interface, ver); + xdg_wm_base_add_listener(wl->wm_base, &xdg_wm_base_listener, wl); } if (!strcmp(interface, wl_seat_interface.name) && found++) { @@ -956,7 +956,7 @@ static const struct xdg_toplevel_listener xdg_toplevel_listener = { static int create_xdg_surface(struct vo_wayland_state *wl) { - wl->xdg_surface = xdg_wm_base_get_xdg_surface(wl->shell, wl->surface); + wl->xdg_surface = xdg_wm_base_get_xdg_surface(wl->wm_base, wl->surface); xdg_surface_add_listener(wl->xdg_surface, &xdg_surface_listener, wl); wl->xdg_toplevel = xdg_surface_get_toplevel(wl->xdg_surface); @@ -1013,7 +1013,7 @@ int vo_wayland_init(struct vo *vo) /* Do a roundtrip to run the registry */ wl_display_roundtrip(wl->display); - if (!wl->shell) { + if (!wl->wm_base) { MP_FATAL(wl, "Compositor doesn't support the required %s protocol!\n", xdg_wm_base_interface.name); return false; @@ -1078,8 +1078,8 @@ void vo_wayland_uninit(struct vo *vo) if (wl->idle_inhibit_manager) zwp_idle_inhibit_manager_v1_destroy(wl->idle_inhibit_manager); - if (wl->shell) - xdg_wm_base_destroy(wl->shell); + if (wl->wm_base) + xdg_wm_base_destroy(wl->wm_base); if (wl->shm) wl_shm_destroy(wl->shm); diff --git a/video/out/wayland_common.h b/video/out/wayland_common.h index b381d7c46f..0648efa09a 100644 --- a/video/out/wayland_common.h +++ b/video/out/wayland_common.h @@ -70,7 +70,7 @@ struct vo_wayland_state { /* Shell */ struct wl_surface *surface; - struct xdg_wm_base *shell; + struct xdg_wm_base *wm_base; struct xdg_toplevel *xdg_toplevel; struct xdg_surface *xdg_surface; struct zxdg_decoration_manager_v1 *xdg_decoration_manager; -- cgit v1.2.3 From 3fe882d4ae80fa060a71dad0d6d1605afcfe98b6 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 27 Dec 2018 18:34:19 +0100 Subject: vo_gpu: improve tone mapping desaturation Instead of desaturating towards luma, we desaturate towards the per-channel tone mapped version. This essentially proves a smooth roll-off towards the "hollywood"-style (non-chromatic) tone mapping algorithm, which works better for bright content, while continuing to use the "linear" style (chromatic) tone mapping algorithm for primarily in-gamut content. We also split up the desaturation algorithm into strength and exponent, which allows users to use less aggressive desaturation settings without affecting the overall curve. --- video/out/gpu/video.c | 41 ++++++++--------- video/out/gpu/video.h | 15 ++++--- video/out/gpu/video_shaders.c | 101 ++++++++++++++++++++++-------------------- video/out/gpu/video_shaders.h | 6 +-- 4 files changed, 87 insertions(+), 76 deletions(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index c12fb8536c..9ffdc62d20 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -313,9 +313,12 @@ static const struct gl_video_opts gl_video_opts_def = { .alpha_mode = ALPHA_BLEND_TILES, .background = {0, 0, 0, 255}, .gamma = 1.0f, - .tone_mapping = TONE_MAPPING_HABLE, - .tone_mapping_param = NAN, - .tone_mapping_desat = 0.5, + .tone_map = { + .curve = TONE_MAPPING_HABLE, + .curve_param = NAN, + .desat = 0.75, + .desat_exp = 1.5, + }, .early_flush = -1, .hwdec_interop = "auto", }; @@ -353,20 +356,22 @@ const struct m_sub_options gl_video_conf = { OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), OPT_CHOICE_OR_INT("target-peak", target_peak, 0, 10, 10000, ({"auto", 0})), - OPT_CHOICE("tone-mapping", tone_mapping, 0, + OPT_CHOICE("tone-mapping", tone_map.curve, 0, ({"clip", TONE_MAPPING_CLIP}, {"mobius", TONE_MAPPING_MOBIUS}, {"reinhard", TONE_MAPPING_REINHARD}, {"hable", TONE_MAPPING_HABLE}, {"gamma", TONE_MAPPING_GAMMA}, {"linear", TONE_MAPPING_LINEAR})), - OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0, + OPT_CHOICE("hdr-compute-peak", tone_map.compute_peak, 0, ({"auto", 0}, {"yes", 1}, {"no", -1})), - OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), - OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0), - OPT_FLAG("gamut-warning", gamut_warning, 0), + OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0), + OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0), + OPT_FLOATRANGE("tone-mapping-desaturate-exponent", + tone_map.desat_exp, 0, 0.0, 20.0), + OPT_FLAG("gamut-warning", tone_map.gamut_warning, 0), OPT_FLAG("opengl-pbo", pbo, 0), SCALER_OPTS("scale", SCALER_SCALE), SCALER_OPTS("dscale", SCALER_DSCALE), @@ -2472,7 +2477,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool if (!dst.sig_peak) dst.sig_peak = mp_trc_nom_peak(dst.gamma); - bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma); + struct gl_tone_map_opts tone_map = p->opts.tone_map; + bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma); if (detect_peak && !p->hdr_peak_ssbo) { struct { uint32_t counter; @@ -2493,8 +2499,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms); if (!p->hdr_peak_ssbo) { MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n"); + tone_map.compute_peak = p->opts.tone_map.compute_peak = -1; detect_peak = false; - p->opts.compute_hdr_peak = -1; } } @@ -2515,9 +2521,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool } // Adapt from src to dst as necessary - pass_color_map(p->sc, src, dst, p->opts.tone_mapping, - p->opts.tone_mapping_param, p->opts.tone_mapping_desat, - detect_peak, p->opts.gamut_warning, p->use_linear && !osd); + pass_color_map(p->sc, p->use_linear && !osd, src, dst, &tone_map); if (p->use_lut_3d) { gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture); @@ -3583,12 +3587,12 @@ static void check_gl_features(struct gl_video *p) } bool have_compute_peak = have_compute && have_ssbo; - if (!have_compute_peak && p->opts.compute_hdr_peak >= 0) { - int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V; + if (!have_compute_peak && p->opts.tone_map.compute_peak >= 0) { + int msgl = p->opts.tone_map.compute_peak == 1 ? MSGL_WARN : MSGL_V; MP_MSG(p, msgl, "Disabling HDR peak computation (one or more of the " "following is not supported: compute shaders=%d, " "SSBO=%d).\n", have_compute, have_ssbo); - p->opts.compute_hdr_peak = -1; + p->opts.tone_map.compute_peak = -1; } p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg; @@ -3610,7 +3614,6 @@ static void check_gl_features(struct gl_video *p) .alpha_mode = p->opts.alpha_mode, .use_rectangle = p->opts.use_rectangle, .background = p->opts.background, - .compute_hdr_peak = p->opts.compute_hdr_peak, .dither_algo = p->opts.dither_algo, .dither_depth = p->opts.dither_depth, .dither_size = p->opts.dither_size, @@ -3618,9 +3621,7 @@ static void check_gl_features(struct gl_video *p) .temporal_dither_period = p->opts.temporal_dither_period, .tex_pad_x = p->opts.tex_pad_x, .tex_pad_y = p->opts.tex_pad_y, - .tone_mapping = p->opts.tone_mapping, - .tone_mapping_param = p->opts.tone_mapping_param, - .tone_mapping_desat = p->opts.tone_mapping_desat, + .tone_map = p->opts.tone_map, .early_flush = p->opts.early_flush, .icc_opts = p->opts.icc_opts, .hwdec_interop = p->opts.hwdec_interop, diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index ca8b6f65d4..ee5c0a2861 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -98,6 +98,15 @@ enum tone_mapping { // How many frames to average over for HDR peak detection #define PEAK_DETECT_FRAMES 63 +struct gl_tone_map_opts { + int curve; + float curve_param; + int compute_peak; + float desat; + float desat_exp; + int gamut_warning; // bool +}; + struct gl_video_opts { int dumb_mode; struct scaler_config scaler[4]; @@ -107,11 +116,7 @@ struct gl_video_opts { int target_prim; int target_trc; int target_peak; - int tone_mapping; - int compute_hdr_peak; - float tone_mapping_param; - float tone_mapping_desat; - int gamut_warning; + struct gl_tone_map_opts tone_map; int correct_downscaling; int linear_downscaling; int linear_upscaling; diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 342fb39ded..315e15cc89 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -580,7 +580,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc) // Have each thread update the work group sum with the local value GLSL(barrier();) - GLSLF("atomicAdd(wg_sum, uint(sig * %f));\n", MP_REF_WHITE); + GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE); // Have one thread per work group update the global atomics. We use the // work group average even for the global sum, to make the values slightly @@ -642,48 +642,42 @@ static void hdr_update_peak(struct gl_shader_cache *sc) // Tone map from a known peak brightness to the range [0,1]. If ref_peak // is 0, we will use peak detection instead -static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, +static void pass_tone_map(struct gl_shader_cache *sc, float src_peak, float dst_peak, - enum tone_mapping algo, float param, float desat) + const struct gl_tone_map_opts *opts) { GLSLF("// HDR tone mapping\n"); // To prevent discoloration due to out-of-bounds clipping, we need to make // sure to reduce the value range as far as necessary to keep the entire // signal in range, so tone map based on the brightest component. - GLSL(float sig = max(max(color.r, color.g), color.b);) + GLSL(int sig_idx = 0;) + GLSL(if (color[1] > color[sig_idx]) sig_idx = 1;) + GLSL(if (color[2] > color[sig_idx]) sig_idx = 2;) + GLSL(float sig_max = color[sig_idx];) GLSLF("float sig_peak = %f;\n", src_peak); GLSLF("float sig_avg = %f;\n", sdr_avg); - if (detect_peak) + if (opts->compute_peak >= 0) hdr_update_peak(sc); + GLSLF("vec3 sig = color.rgb;\n"); + // Rescale the variables in order to bring it into a representation where // 1.0 represents the dst_peak. This is because all of the tone mapping // algorithms are defined in such a way that they map to the range [0.0, 1.0]. if (dst_peak > 1.0) { - GLSLF("sig *= %f;\n", 1.0 / dst_peak); - GLSLF("sig_peak *= %f;\n", 1.0 / dst_peak); + GLSLF("sig *= 1.0/%f;\n", dst_peak); + GLSLF("sig_peak *= 1.0/%f;\n", dst_peak); } - GLSL(float sig_orig = sig;) + GLSL(float sig_orig = sig[sig_idx];) GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg); GLSL(sig *= slope;) GLSL(sig_peak *= slope;) - // Desaturate the color using a coefficient dependent on the signal. - // Do this after peak detection in order to prevent over-desaturating - // overly bright souces - if (desat > 0) { - float base = 0.18 * dst_peak; - GLSL(float luma = dot(dst_luma, color.rgb);) - GLSLF("float coeff = max(sig - %f, 1e-6) / max(sig, 1e-6);\n", base); - GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat); - GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);) - GLSL(sig = mix(sig, luma * slope, coeff);) // also make sure to update `sig` - } - - switch (algo) { + float param = opts->curve_param; + switch (opts->curve) { case TONE_MAPPING_CLIP: GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); break; @@ -697,14 +691,15 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / " "max(1e-6, sig_peak - 1.0);\n"); GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); - GLSL(sig = sig > j ? scale * (sig + a) / (sig + b) : sig;) + GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b))," + " greaterThan(sig, vec3(j)));\n"); GLSLF("}\n"); break; case TONE_MAPPING_REINHARD: { float contrast = isnan(param) ? 0.5 : param, offset = (1.0 - contrast) / contrast; - GLSLF("sig = sig / (sig + %f);\n", offset); + GLSLF("sig = sig / (sig + vec3(%f));\n", offset); GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset); GLSL(sig *= scale;) break; @@ -712,19 +707,25 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, case TONE_MAPPING_HABLE: { float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; - GLSLHF("float hable(float x) {\n"); - GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n", - A, C*B, D*E, A, B, D*F, E/F); + GLSLHF("vec3 hable(vec3 x) {\n"); + GLSLHF("return (x * (%f*x + vec3(%f)) + vec3(%f)) / " + " (x * (%f*x + vec3(%f)) + vec3(%f)) " + " - vec3(%f);\n", + A, C*B, D*E, + A, B, D*F, + E/F); GLSLHF("}\n"); - GLSL(sig = hable(sig) / hable(sig_peak);) + GLSLF("sig = hable(max(vec3(0.0), sig)) / hable(vec3(sig_peak)).x;\n"); break; } case TONE_MAPPING_GAMMA: { float gamma = isnan(param) ? 1.8 : param; - GLSLF("const float cutoff = 0.05, gamma = %f;\n", 1.0/gamma); - GLSL(float scale = pow(cutoff / sig_peak, gamma) / cutoff;) - GLSL(sig = sig > cutoff ? pow(sig / sig_peak, gamma) : scale * sig;) + GLSLF("const float cutoff = 0.05, gamma = 1.0/%f;\n", gamma); + GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;) + GLSLF("sig = mix(scale * sig," + " pow(sig / sig_peak, vec3(gamma))," + " greaterThan(sig, vec3(cutoff)));\n"); break; } @@ -738,24 +739,32 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, abort(); } - // Apply the computed scale factor to the color, linearly to prevent - // discoloration - GLSL(sig = min(sig, 1.0);) - GLSL(color.rgb *= vec3(sig / sig_orig);) + GLSL(sig = min(sig, vec3(1.0));) + GLSL(vec3 sig_lin = color.rgb * (sig[sig_idx] / sig_orig);) + + // Mix between the per-channel tone mapped and the linear tone mapped + // signal based on the desaturation strength + if (opts->desat > 0) { + float base = 0.18 * dst_peak; + GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / " + " max(sig[sig_idx], 1.0);\n", base); + GLSLF("coeff = %f * pow(coeff, %f);\n", opts->desat, opts->desat_exp); + GLSLF("color.rgb = mix(sig_lin, %f * sig, coeff);\n", dst_peak); + } else { + GLSL(color.rgb = sig_lin;) + } } // Map colors from one source space to another. These source spaces must be // known (i.e. not MP_CSP_*_AUTO), as this function won't perform any // auto-guessing. If is_linear is true, we assume the input has already been -// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will -// detect the peak instead of relying on metadata. Note that this requires -// the caller to have already bound the appropriate SSBO and set up the -// compute shader metadata -void pass_color_map(struct gl_shader_cache *sc, +// linearized (e.g. for linear-scaling). If `opts->compute_peak` is true, we +// will detect the peak instead of relying on metadata. Note that this requires +// the caller to have already bound the appropriate SSBO and set up the compute +// shader metadata +void pass_color_map(struct gl_shader_cache *sc, bool is_linear, struct mp_colorspace src, struct mp_colorspace dst, - enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool detect_peak, - bool gamut_warning, bool is_linear) + const struct gl_tone_map_opts *opts) { GLSLF("// color mapping\n"); @@ -803,10 +812,8 @@ void pass_color_map(struct gl_shader_cache *sc, // Tone map to prevent clipping when the source signal peak exceeds the // encodable range or we've reduced the gamut - if (src.sig_peak > dst.sig_peak) { - pass_tone_map(sc, detect_peak, src.sig_peak, dst.sig_peak, algo, - tone_mapping_param, tone_mapping_desat); - } + if (src.sig_peak > dst.sig_peak) + pass_tone_map(sc, src.sig_peak, dst.sig_peak, opts); if (need_ootf) pass_inverse_ootf(sc, dst.light, dst.sig_peak); @@ -821,7 +828,7 @@ void pass_color_map(struct gl_shader_cache *sc, GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range); // Warn for remaining out-of-gamut colors is enabled - if (gamut_warning) { + if (opts->gamut_warning) { GLSL(if (any(greaterThan(color.rgb, vec3(1.01))))) GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert } diff --git a/video/out/gpu/video_shaders.h b/video/out/gpu/video_shaders.h index cd395d6377..f20d643e99 100644 --- a/video/out/gpu/video_shaders.h +++ b/video/out/gpu/video_shaders.h @@ -40,11 +40,9 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); -void pass_color_map(struct gl_shader_cache *sc, +void pass_color_map(struct gl_shader_cache *sc, bool is_linear, struct mp_colorspace src, struct mp_colorspace dst, - enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool use_detected_peak, - bool gamut_warning, bool is_linear); + const struct gl_tone_map_opts *opts); void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, AVLFG *lfg, enum mp_csp_trc trc); -- cgit v1.2.3 From 6179dcbb798aa9e3501af82ae46975e881d80626 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Tue, 1 Jan 2019 07:30:00 +0100 Subject: vo_gpu: redesign peak detection algorithm The previous approach of using an FIR with tunable hard threshold for scene changes had several problems: - the FIR involved annoying hard-coded buffer sizes, high VRAM usage, and the FIR sum was prone to numerical overflow which limited the number of frames we could average over. We also totally redesign the scene change detection. - the hard scene change detection was prone to both false positives and false negatives, each with their own (annoying) issues. Scrap this entirely and switch to a dual approach of using a simple single-pole IIR low pass filter to smooth out noise, while using a softer scene change curve (with tunable low and high thresholds), based on `smoothstep`. The IIR filter is extremely simple in its implementation and has an arbitrarily user-tunable cutoff frequency, while the smoothstep-based scene change curve provides a good, tunable tradeoff between adaptation speed and stability - without exhibiting either of the traditional issues associated with the hard cutoff. Another way to think about the new options is that the "low threshold" provides a margin of error within which we don't care about small fluctuations in the scene (which will therefore be smoothed out by the IIR filter). --- video/out/gpu/video.c | 36 ++++++++-------- video/out/gpu/video.h | 6 +-- video/out/gpu/video_shaders.c | 96 +++++++++++++++++-------------------------- 3 files changed, 61 insertions(+), 77 deletions(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 9ffdc62d20..a29f09bc3d 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -316,6 +316,9 @@ static const struct gl_video_opts gl_video_opts_def = { .tone_map = { .curve = TONE_MAPPING_HABLE, .curve_param = NAN, + .decay_rate = 100.0, + .scene_threshold_low = 50, + .scene_threshold_high = 200, .desat = 0.75, .desat_exp = 1.5, }, @@ -367,6 +370,11 @@ const struct m_sub_options gl_video_conf = { ({"auto", 0}, {"yes", 1}, {"no", -1})), + OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0), + OPT_INTRANGE("hdr-scene-threshold-low", + tone_map.scene_threshold_low, 0, 0, 10000), + OPT_INTRANGE("hdr-scene-threshold-high", + tone_map.scene_threshold_high, 0, 0, 10000), OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0), OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0), OPT_FLOATRANGE("tone-mapping-desaturate-exponent", @@ -2478,17 +2486,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool dst.sig_peak = mp_trc_nom_peak(dst.gamma); struct gl_tone_map_opts tone_map = p->opts.tone_map; - bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma); + bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma) + && src.sig_peak > dst.sig_peak; + if (detect_peak && !p->hdr_peak_ssbo) { struct { + float average[2]; + uint32_t frame_sum; + uint32_t frame_max; uint32_t counter; - uint32_t frame_idx; - uint32_t frame_num; - uint32_t frame_max[PEAK_DETECT_FRAMES+1]; - uint32_t frame_sum[PEAK_DETECT_FRAMES+1]; - uint32_t total_max; - uint32_t total_sum; - } peak_ssbo = {0}; + } peak_ssbo = { + .average = { 0.25, src.sig_peak }, + }; struct ra_buf_params params = { .type = RA_BUF_TYPE_SHADER_STORAGE, @@ -2508,15 +2517,10 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool pass_describe(p, "detect HDR peak"); pass_is_compute(p, 8, 8, true); // 8x8 is good for performance gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, + "vec2 average;" + "uint frame_sum;" + "uint frame_max;" "uint counter;" - "uint frame_idx;" - "uint frame_num;" - "uint frame_max[%d];" - "uint frame_avg[%d];" - "uint total_max;" - "uint total_avg;", - PEAK_DETECT_FRAMES + 1, - PEAK_DETECT_FRAMES + 1 ); } diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index ee5c0a2861..077f69332f 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -95,13 +95,13 @@ enum tone_mapping { TONE_MAPPING_LINEAR, }; -// How many frames to average over for HDR peak detection -#define PEAK_DETECT_FRAMES 63 - struct gl_tone_map_opts { int curve; float curve_param; int compute_peak; + float decay_rate; + int scene_threshold_low; + int scene_threshold_high; float desat; float desat_exp; int gamut_warning; // bool diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 315e15cc89..0fff8f05f2 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -567,75 +567,55 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh // under a typical presentation gamma of about 2.0. static const float sdr_avg = 0.25; -// The threshold for which to consider an average luminance difference to be -// a sign of a scene change. -static const int scene_threshold = 0.2 * MP_REF_WHITE; - -static void hdr_update_peak(struct gl_shader_cache *sc) +static void hdr_update_peak(struct gl_shader_cache *sc, + const struct gl_tone_map_opts *opts) { - // For performance, we want to do as few atomic operations on global - // memory as possible, so use an atomic in shmem for the work group. - GLSLH(shared uint wg_sum;); - GLSL(wg_sum = 0;) - - // Have each thread update the work group sum with the local value + // Update the sig_peak/sig_avg from the old SSBO state + GLSL(sig_avg = max(1e-3, average.x);) + GLSL(sig_peak = max(1.00, average.y);) + + // For performance, and to avoid overflows, we tally up the sub-results per + // pixel using shared memory first + GLSLH(shared uint wg_sum;) + GLSLH(shared uint wg_max;) + GLSL(wg_sum = wg_max = 0;) GLSL(barrier();) - GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE); + GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE); + GLSL(atomicAdd(wg_sum, sig_uint);) + GLSL(atomicMax(wg_max, sig_uint);) - // Have one thread per work group update the global atomics. We use the - // work group average even for the global sum, to make the values slightly - // more stable and smooth out tiny super-highlights. + // Have one thread per work group update the global atomics GLSL(memoryBarrierShared();) GLSL(barrier();) GLSL(if (gl_LocalInvocationIndex == 0) {) GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);) - GLSL( atomicMax(frame_max[frame_idx], wg_avg);) - GLSL( atomicAdd(frame_avg[frame_idx], wg_avg);) + GLSL( atomicAdd(frame_sum, wg_avg);) + GLSL( atomicMax(frame_max, wg_max);) + GLSL( memoryBarrierBuffer();) GLSL(}) - - const float refi = 1.0 / MP_REF_WHITE; - - // Update the sig_peak/sig_avg from the old SSBO state - GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) - GLSL(if (frame_num > 0) {) - GLSLF(" float peak = %f * float(total_max) / float(frame_num);\n", refi); - GLSLF(" float avg = %f * float(total_avg) / float(frame_num);\n", refi); - GLSLF(" sig_peak = max(1.0, peak);\n"); - GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg); - GLSL(}); + GLSL(barrier();) // Finally, to update the global state, we increment a counter per dispatch - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) + GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {) - - // Since we sum up all the workgroups, we also still need to divide the - // average by the number of work groups GLSL( counter = 0;) - GLSL( frame_avg[frame_idx] /= num_wg;) - GLSL( uint cur_max = frame_max[frame_idx];) - GLSL( uint cur_avg = frame_avg[frame_idx];) - - // Scene change detection - GLSL( int diff = int(frame_num * cur_avg) - int(total_avg);) - GLSLF(" if (abs(diff) > frame_num * %d) {\n", scene_threshold); - GLSL( frame_num = 0;) - GLSL( total_max = total_avg = 0;) - GLSLF(" for (uint i = 0; i < %d; i++)\n", PEAK_DETECT_FRAMES+1); - GLSL( frame_max[i] = frame_avg[i] = 0;) - GLSL( frame_max[frame_idx] = cur_max;) - GLSL( frame_avg[frame_idx] = cur_avg;) - GLSL( }) - - // Add the current frame, then subtract and reset the next frame - GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); - GLSL( total_max += cur_max - frame_max[next];) - GLSL( total_avg += cur_avg - frame_avg[next];) - GLSL( frame_max[next] = frame_avg[next] = 0;) - - // Update the index and count - GLSL( frame_idx = next;) - GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES); + GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) + GLSLF(" cur *= 1.0/%f;\n", MP_REF_WHITE); + + // Use an IIR low-pass filter to smooth out the detected values, with a + // configurable decay rate based on the desired time constant (tau) + float a = 1.0 - cos(1.0 / opts->decay_rate); + float decay = sqrt(a*a + 2*a) - a; + GLSLF(" average += %f * (cur - average);\n", decay); + + // Scene change hysteresis + GLSLF(" float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n", + (float) opts->scene_threshold_low / MP_REF_WHITE, + (float) opts->scene_threshold_high / MP_REF_WHITE); + GLSL( average = mix(average, cur, weight);) + + // Reset SSBO state for the next frame + GLSL( frame_max = frame_sum = 0;) GLSL( memoryBarrierBuffer();) GLSL(}) } @@ -659,7 +639,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, GLSLF("float sig_avg = %f;\n", sdr_avg); if (opts->compute_peak >= 0) - hdr_update_peak(sc); + hdr_update_peak(sc, opts); GLSLF("vec3 sig = color.rgb;\n"); -- cgit v1.2.3 From 12e58ff8a65c537a222a3fb954f88d98a3a5bfd2 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Wed, 2 Jan 2019 03:03:38 +0100 Subject: vo_gpu: allow boosting dark scenes when tone mapping In theory our "eye adaptation" algorithm works in both ways, both darkening bright scenes and brightening dark scenes. But I've always just prevented the latter with a hard clamp, since I wanted to avoid blowing up dark scenes into looking funny (and full of noise). But allowing a tiny bit of over-exposure might be a good thing. I won't change the default just yet (better let users test), but a moderate value of 1.2 might be better than the current 1.0 limit. Needs testing especially on dark scenes. --- video/out/gpu/video.c | 2 ++ video/out/gpu/video.h | 1 + video/out/gpu/video_shaders.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index a29f09bc3d..6bf0bb31a1 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -316,6 +316,7 @@ static const struct gl_video_opts gl_video_opts_def = { .tone_map = { .curve = TONE_MAPPING_HABLE, .curve_param = NAN, + .max_boost = 1.0, .decay_rate = 100.0, .scene_threshold_low = 50, .scene_threshold_high = 200, @@ -376,6 +377,7 @@ const struct m_sub_options gl_video_conf = { OPT_INTRANGE("hdr-scene-threshold-high", tone_map.scene_threshold_high, 0, 0, 10000), OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0), + OPT_FLOATRANGE("tone-mapping-max-boost", tone_map.max_boost, 0, 1.0, 10.0), OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0), OPT_FLOATRANGE("tone-mapping-desaturate-exponent", tone_map.desat_exp, 0, 0.0, 20.0), diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index 077f69332f..0bd5c57e8f 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -98,6 +98,7 @@ enum tone_mapping { struct gl_tone_map_opts { int curve; float curve_param; + float max_boost; int compute_peak; float decay_rate; int scene_threshold_low; diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 0fff8f05f2..fbccd56eb3 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -652,7 +652,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, } GLSL(float sig_orig = sig[sig_idx];) - GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg); + GLSLF("float slope = min(%f, %f / sig_avg);\n", opts->max_boost, sdr_avg); GLSL(sig *= slope;) GLSL(sig_peak *= slope;) -- cgit v1.2.3 From fdd671188d7edb8d150ec2c93656fb80bf031f12 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Wed, 2 Jan 2019 07:18:29 +0100 Subject: vo_gpu: improve accuracy of HDR brightness estimation This change switches to a logarithmic mean to estimate the average signal brightness. This handles dark scenes with isolated highlights much more faithfully than the linear mean did, since the log of the signal roughly corresponds to the perceptual brightness. --- video/out/gpu/video.c | 4 ++-- video/out/gpu/video_shaders.c | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 6bf0bb31a1..be49551dfb 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -2494,7 +2494,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool if (detect_peak && !p->hdr_peak_ssbo) { struct { float average[2]; - uint32_t frame_sum; + int32_t frame_sum; uint32_t frame_max; uint32_t counter; } peak_ssbo = { @@ -2520,7 +2520,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool pass_is_compute(p, 8, 8, true); // 8x8 is good for performance gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, "vec2 average;" - "uint frame_sum;" + "int frame_sum;" "uint frame_max;" "uint counter;" ); diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index fbccd56eb3..127db58ea2 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -574,21 +574,24 @@ static void hdr_update_peak(struct gl_shader_cache *sc, GLSL(sig_avg = max(1e-3, average.x);) GLSL(sig_peak = max(1.00, average.y);) + // Chosen to avoid overflowing on an 8K buffer + const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0; + // For performance, and to avoid overflows, we tally up the sub-results per // pixel using shared memory first - GLSLH(shared uint wg_sum;) + GLSLH(shared int wg_sum;) GLSLH(shared uint wg_max;) - GLSL(wg_sum = wg_max = 0;) + GLSL(wg_sum = 0; wg_max = 0;) GLSL(barrier();) - GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE); - GLSL(atomicAdd(wg_sum, sig_uint);) - GLSL(atomicMax(wg_max, sig_uint);) + GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min); + GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale); + GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale); // Have one thread per work group update the global atomics GLSL(memoryBarrierShared();) GLSL(barrier();) GLSL(if (gl_LocalInvocationIndex == 0) {) - GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);) + GLSL( int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);) GLSL( atomicAdd(frame_sum, wg_avg);) GLSL( atomicMax(frame_max, wg_max);) GLSL( memoryBarrierBuffer();) @@ -600,7 +603,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc, GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {) GLSL( counter = 0;) GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) - GLSLF(" cur *= 1.0/%f;\n", MP_REF_WHITE); + GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale); + GLSL( cur.x = exp(cur.x);) // Use an IIR low-pass filter to smooth out the detected values, with a // configurable decay rate based on the desired time constant (tau) @@ -615,7 +619,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc, GLSL( average = mix(average, cur, weight);) // Reset SSBO state for the next frame - GLSL( frame_max = frame_sum = 0;) + GLSL( frame_sum = 0; frame_max = 0;) GLSL( memoryBarrierBuffer();) GLSL(}) } -- cgit v1.2.3 From 4e8022da269d02c3bb23e4e119e4b1dc9aa3f3e4 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 3 Jan 2019 00:44:15 +0100 Subject: vo_gpu: allow color management in dumb mode There's no point to disallow target-trc/prim in dumb mode, since they still work fine. --- video/out/gpu/video.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index be49551dfb..5e805019ea 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -3513,9 +3513,9 @@ static bool check_dumb_mode(struct gl_video *p) return false; // otherwise, use auto-detection - if (o->target_prim || o->target_trc || o->correct_downscaling || - o->linear_downscaling || o->linear_upscaling || o->sigmoid_upscaling || - o->interpolation || o->blend_subs || o->deband || o->unsharp) + if (o->correct_downscaling || o->linear_downscaling || + o->linear_upscaling || o->sigmoid_upscaling || o->interpolation || + o->blend_subs || o->deband || o->unsharp) return false; // check remaining scalers (tscale is already implicitly excluded above) for (int i = 0; i < SCALER_COUNT; i++) { @@ -3527,8 +3527,6 @@ static bool check_dumb_mode(struct gl_video *p) } if (o->user_shaders && o->user_shaders[0]) return false; - if (p->use_lut_3d) - return false; return true; } @@ -3631,6 +3629,9 @@ static void check_gl_features(struct gl_video *p) .early_flush = p->opts.early_flush, .icc_opts = p->opts.icc_opts, .hwdec_interop = p->opts.hwdec_interop, + .target_trc = p->opts.target_trc, + .target_prim = p->opts.target_prim, + .target_peak = p->opts.target_peak, }; for (int n = 0; n < SCALER_COUNT; n++) p->opts.scaler[n] = gl_video_opts_def.scaler[n]; -- cgit v1.2.3 From 11b58415d51e14760ffb0302d9c6d86a504a2b57 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 3 Jan 2019 01:29:08 +0100 Subject: vo_gpu: improve numerical accuracy of PQ OETF constant Not a huge deal, but we can do the division in C, which makes the float constant larger. --- video/out/gpu/video_shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'video') diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 127db58ea2..3b6c9d01af 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -380,7 +380,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" " / (vec3(%f) - vec3(%f) * color.rgb);\n", PQ_C1, PQ_C2, PQ_C3); - GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M1); + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", 1.0 / PQ_M1); // PQ's output range is 0-10000, but we need it to be relative to to // MP_REF_WHITE instead, so rescale GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE); -- cgit v1.2.3 From 677ae4f8fe5c9896bc7b7b174e75400c15afc146 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 3 Jan 2019 01:39:23 +0100 Subject: vo_gpu: make --gamut-warning warn on negative colors As is the case for actually out-of-gamut colors (rather than just too bright colors). --- video/out/gpu/video_shaders.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'video') diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 3b6c9d01af..806f0e17dd 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -813,7 +813,8 @@ void pass_color_map(struct gl_shader_cache *sc, bool is_linear, // Warn for remaining out-of-gamut colors is enabled if (opts->gamut_warning) { - GLSL(if (any(greaterThan(color.rgb, vec3(1.01))))) + GLSL(if (any(greaterThan(color.rgb, vec3(1.01))) || + any(lessThan(color.rgb, vec3(0.0))))) GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert } -- cgit v1.2.3 From 258ed5d471334ef756563a5384540c063697f3b3 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 3 Jan 2019 01:40:08 +0100 Subject: vo_gpu: tone map before gamut mapping Gamut mapping can take very bright out-of-gamut colors into the negatives, which completely destroys the color balance (which tone mapping tries its best to preserve). --- video/out/gpu/video_shaders.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'video') diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 806f0e17dd..07ac0b940f 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -784,6 +784,10 @@ void pass_color_map(struct gl_shader_cache *sc, bool is_linear, if (need_ootf) pass_ootf(sc, src.light, src.sig_peak); + // Tone map to prevent clipping due to excessive brightness + if (src.sig_peak > dst.sig_peak) + pass_tone_map(sc, src.sig_peak, dst.sig_peak, opts); + // Adapt to the right colorspace if necessary if (src.primaries != dst.primaries) { struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries), @@ -794,11 +798,6 @@ void pass_color_map(struct gl_shader_cache *sc, bool is_linear, GLSL(color.rgb = cms_matrix * color.rgb;) } - // Tone map to prevent clipping when the source signal peak exceeds the - // encodable range or we've reduced the gamut - if (src.sig_peak > dst.sig_peak) - pass_tone_map(sc, src.sig_peak, dst.sig_peak, opts); - if (need_ootf) pass_inverse_ootf(sc, dst.light, dst.sig_peak); -- cgit v1.2.3 From b4b719e33748970a9bf98a82a017d8f149ecb557 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 3 Jan 2019 17:18:58 +0100 Subject: vo_gpu: clamp sigmoid function Can explode on some clips otherwise --- video/out/gpu/video.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 5e805019ea..df357b3552 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -2381,6 +2381,7 @@ static void pass_scale_main(struct gl_video *p) // values at 1 and 0, and then scale/shift them, respectively. sig_offset = 1.0/(1+expf(sig_slope * sig_center)); sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n", sig_center, sig_scale, sig_offset, sig_slope); pass_opt_hook_point(p, "SIGMOID", NULL); @@ -2408,6 +2409,7 @@ static void pass_scale_main(struct gl_video *p) GLSLF("// scaler post-conversion\n"); if (use_sigmoid) { // Inverse of the transformation above + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n", sig_slope, sig_center, sig_offset, sig_scale); } -- cgit v1.2.3 From 3f1bc25d4de6150b0acff7e92d3e3084a7d989f0 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Fri, 4 Jan 2019 16:46:38 +0100 Subject: vo_gpu: use dB units for scene change detection Rather than the linear cd/m^2 units, these (relative) logarithmic units lend themselves much better to actually detecting scene changes, especially since the scene averaging was changed to also work logarithmically. --- video/out/gpu/video.c | 12 ++++++------ video/out/gpu/video.h | 4 ++-- video/out/gpu/video_shaders.c | 7 ++++--- 3 files changed, 12 insertions(+), 11 deletions(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index df357b3552..24e6990139 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -318,8 +318,8 @@ static const struct gl_video_opts gl_video_opts_def = { .curve_param = NAN, .max_boost = 1.0, .decay_rate = 100.0, - .scene_threshold_low = 50, - .scene_threshold_high = 200, + .scene_threshold_low = 5.5, + .scene_threshold_high = 10.0, .desat = 0.75, .desat_exp = 1.5, }, @@ -372,10 +372,10 @@ const struct m_sub_options gl_video_conf = { {"yes", 1}, {"no", -1})), OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0), - OPT_INTRANGE("hdr-scene-threshold-low", - tone_map.scene_threshold_low, 0, 0, 10000), - OPT_INTRANGE("hdr-scene-threshold-high", - tone_map.scene_threshold_high, 0, 0, 10000), + OPT_FLOATRANGE("hdr-scene-threshold-low", + tone_map.scene_threshold_low, 0, 0, 20.0), + OPT_FLOATRANGE("hdr-scene-threshold-high", + tone_map.scene_threshold_high, 0, 0, 20.0), OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0), OPT_FLOATRANGE("tone-mapping-max-boost", tone_map.max_boost, 0, 1.0, 10.0), OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0), diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index 0bd5c57e8f..1b0994ac78 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -101,8 +101,8 @@ struct gl_tone_map_opts { float max_boost; int compute_peak; float decay_rate; - int scene_threshold_low; - int scene_threshold_high; + float scene_threshold_low; + float scene_threshold_high; float desat; float desat_exp; int gamut_warning; // bool diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 07ac0b940f..5fea739385 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -613,9 +613,10 @@ static void hdr_update_peak(struct gl_shader_cache *sc, GLSLF(" average += %f * (cur - average);\n", decay); // Scene change hysteresis - GLSLF(" float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n", - (float) opts->scene_threshold_low / MP_REF_WHITE, - (float) opts->scene_threshold_high / MP_REF_WHITE); + float log_db = 10.0 / log(10.0); + GLSLF(" float weight = smoothstep(%f, %f, abs(log(cur.x / average.x)));\n", + opts->scene_threshold_low / log_db, + opts->scene_threshold_high / log_db); GLSL( average = mix(average, cur, weight);) // Reset SSBO state for the next frame -- cgit v1.2.3 From 8b563a034604ff5ab2ad92d12c63e806f45d1bb6 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Wed, 9 Jan 2019 17:14:19 +0100 Subject: vo_gpu: fix initial seeding of the peak detect ssbo This solves some edge cases when using files with very weird metadata (e.g. MaxCLL 10k and so forth). Instead of just blindly seeding it with the tagged metadata, forcibly set the initial state from the detected values. --- video/out/gpu/video.c | 4 +--- video/out/gpu/video_shaders.c | 8 ++++++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'video') diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 24e6990139..593f5fb9c1 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -2499,9 +2499,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool int32_t frame_sum; uint32_t frame_max; uint32_t counter; - } peak_ssbo = { - .average = { 0.25, src.sig_peak }, - }; + } peak_ssbo = {0}; struct ra_buf_params params = { .type = RA_BUF_TYPE_SHADER_STORAGE, diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 5fea739385..b34aa90bfa 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -571,8 +571,10 @@ static void hdr_update_peak(struct gl_shader_cache *sc, const struct gl_tone_map_opts *opts) { // Update the sig_peak/sig_avg from the old SSBO state - GLSL(sig_avg = max(1e-3, average.x);) - GLSL(sig_peak = max(1.00, average.y);) + GLSL(if (average.y > 0.0) {) + GLSL( sig_avg = max(1e-3, average.x);) + GLSL( sig_peak = max(1.00, average.y);) + GLSL(}) // Chosen to avoid overflowing on an 8K buffer const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0; @@ -605,6 +607,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc, GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale); GLSL( cur.x = exp(cur.x);) + GLSL( if (average.y == 0.0)) + GLSL( average = cur;) // Use an IIR low-pass filter to smooth out the detected values, with a // configurable decay rate based on the desired time constant (tau) -- cgit v1.2.3 From ae115bd8d8535bd4d40438d1bf7c380a6731d6a4 Mon Sep 17 00:00:00 2001 From: Anton Kindestam Date: Tue, 16 Oct 2018 17:51:47 +0200 Subject: opengl: Support GL_ARB_sync style fences on OpenGL ES 3.0 OpenGL ES 3.0 and up has suppport for for GL_ARB_sync style fences. Make sure that mpv can use them. --- video/out/opengl/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'video') diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index 00692f0350..86d0eec404 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -262,6 +262,7 @@ static const struct gl_functions gl_functions[] = { }, { .ver_core = 320, + .ver_es_core = 300, .extension = "GL_ARB_sync", .functions = (const struct gl_function[]) { DEF_FN(FenceSync), -- cgit v1.2.3 From 537006965e98255deb6c433c3f715738d9092946 Mon Sep 17 00:00:00 2001 From: Anton Kindestam Date: Tue, 27 Nov 2018 18:39:16 +0100 Subject: context_drm_egl: implement n-buffering This allows context_drm_egl to use as many buffers as libgbm or the swapchain_depth setting allows (whichever is smaller). On pause and on still images (cover art etc.) to make sure that output does not lag behind user input, the swapchain is drained and reverts to working in a dual buffered (equivalent to swapchain-depth=1) manner. When possible (swapchain-depth>=2), the wait on the page flip event is now not done immediately after queueing, but is deferred to the next invocation of swap_buffers. Which should give us more CPU time between invocations. Although, since gbm_surface_has_free_buffers() can only tell us a boolean value and not how many buffers we have left, we are forced to do this contortionist dance where we first overshoot until gbm_surface_has_free_buffers() reports 0, followed by immediately waiting so we can free a buffer, to be able to get the deferred wait on page flip rolling. With this commit we do not rely on the default vsync fences/latency emulation of video/out/opengl/context.c, but supply our own, since the places we create and wait for the fences needs to be somewhat different for best performance. Minor fixes: * According to GBM documentation all BO:s gotten with gbm_surface_lock_front_buffer must be released before gbm_surface_destroy is called on the surface. * We let the page flip handler function handle the waiting_for_flip flag. --- video/out/opengl/context_drm_egl.c | 209 ++++++++++++++++++++++++++----------- 1 file changed, 150 insertions(+), 59 deletions(-) (limited to 'video') diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c index 6aa3d95e79..7e14155d28 100644 --- a/video/out/opengl/context_drm_egl.c +++ b/video/out/opengl/context_drm_egl.c @@ -50,8 +50,8 @@ struct gbm { struct gbm_surface *surface; struct gbm_device *device; - struct gbm_bo *bo; - struct gbm_bo *next_bo; + struct gbm_bo **bo; + unsigned int num_bos; }; struct egl @@ -72,6 +72,9 @@ struct priv { struct gbm gbm; struct framebuffer *fb; + GLsync *vsync_fences; + unsigned int num_vsync_fences; + uint32_t gbm_format; bool active; @@ -80,6 +83,9 @@ struct priv { bool vt_switcher_active; struct vt_switcher vt_switcher; + bool still; + bool paused; + struct mpv_opengl_drm_params drm_params; struct mpv_opengl_drm_draw_surface_size draw_surface_size; }; @@ -355,15 +361,6 @@ static void crtc_release(struct ra_ctx *ctx) return; p->active = false; - // wait for current page flip - while (p->waiting_for_flip) { - int ret = drmHandleEvent(p->kms->fd, &p->ev); - if (ret) { - MP_ERR(ctx->vo, "drmHandleEvent failed: %i\n", ret); - break; - } - } - if (p->kms->atomic_context) { if (p->kms->atomic_context->old_state.saved) { if (!crtc_release_atomic(ctx)) @@ -414,44 +411,19 @@ static void acquire_vt(void *data) c