diff options
Diffstat (limited to 'video/out/gpu/video.c')
-rw-r--r-- | video/out/gpu/video.c | 994 |
1 files changed, 574 insertions, 420 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 88a8557cc2..1478ec4687 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -29,6 +29,7 @@ #include "misc/bstr.h" #include "options/m_config.h" +#include "options/path.h" #include "common/global.h" #include "options/options.h" #include "utils.h" @@ -182,6 +183,7 @@ struct gl_video { struct mp_image_params real_image_params; // configured format struct mp_image_params image_params; // texture format (mind hwdec case) + struct mp_image_params target_params; // target format struct ra_imgfmt_desc ra_format; // texture format int plane_count; @@ -211,6 +213,7 @@ struct gl_video { struct ra_tex *merge_tex[4]; struct ra_tex *scale_tex[4]; struct ra_tex *integer_tex[4]; + struct ra_tex *chroma_tex[4]; struct ra_tex *indirect_tex; struct ra_tex *blend_subs_tex; struct ra_tex *error_diffusion_tex[2]; @@ -280,10 +283,7 @@ struct gl_video { struct cached_file *files; int num_files; - bool hwdec_interop_loading_done; - struct ra_hwdec **hwdecs; - int num_hwdecs; - + struct ra_hwdec_ctx hwdec_ctx; struct ra_hwdec_mapper *hwdec_mapper; struct ra_hwdec *hwdec_overlay; bool hwdec_active; @@ -292,11 +292,11 @@ struct gl_video { bool broken_frame; // temporary error state bool colorspace_override_warned; + bool correct_downscaling_warned; }; static const struct gl_video_opts gl_video_opts_def = { .dither_algo = DITHER_FRUIT, - .dither_depth = -1, .dither_size = 6, .temporal_dither_period = 1, .error_diffusion = "sierra-lite", @@ -304,164 +304,182 @@ static const struct gl_video_opts gl_video_opts_def = { .sigmoid_center = 0.75, .sigmoid_slope = 6.5, .scaler = { - {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // scale - {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // dscale - {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // cscale - {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .clamp = 1, }, // tscale + {{"lanczos", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // scale + {{"hermite", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // dscale + {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // cscale + {{"oversample", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // tscale }, - .scaler_resizes_only = 1, - .scaler_lut_size = 6, - .interpolation_threshold = 0.0001, - .alpha_mode = ALPHA_BLEND_TILES, - .background = {0, 0, 0, 255}, + .scaler_resizes_only = true, + .correct_downscaling = true, + .linear_downscaling = true, + .sigmoid_upscaling = true, + .interpolation_threshold = 0.01, + .background = BACKGROUND_TILES, + .background_color = {0, 0, 0, 255}, .gamma = 1.0f, .tone_map = { - .curve = TONE_MAPPING_HABLE, + .curve = TONE_MAPPING_AUTO, .curve_param = NAN, .max_boost = 1.0, - .decay_rate = 100.0, - .scene_threshold_low = 5.5, - .scene_threshold_high = 10.0, - .desat = 0.75, - .desat_exp = 1.5, + .decay_rate = 20.0, + .scene_threshold_low = 1.0, + .scene_threshold_high = 3.0, + .contrast_smoothness = 3.5, }, .early_flush = -1, + .shader_cache = true, .hwdec_interop = "auto", }; -static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); - -static int validate_window_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); - -static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); +static OPT_STRING_VALIDATE_FUNC(validate_scaler_opt); +static OPT_STRING_VALIDATE_FUNC(validate_window_opt); +static OPT_STRING_VALIDATE_FUNC(validate_error_diffusion_opt); #define OPT_BASE_STRUCT struct gl_video_opts // Use for options which use NAN for defaults. -#define OPT_FLOATDEF(name, var, flags) \ - OPT_FLOAT(name, var, (flags) | M_OPT_DEFAULT_NAN) +#define OPT_FLOATDEF(field) \ + OPT_FLOAT(field), \ + .flags = M_OPT_DEFAULT_NAN #define SCALER_OPTS(n, i) \ - OPT_STRING_VALIDATE(n, scaler[i].kernel.name, 0, validate_scaler_opt), \ - OPT_FLOATDEF(n"-param1", scaler[i].kernel.params[0], 0), \ - OPT_FLOATDEF(n"-param2", scaler[i].kernel.params[1], 0), \ - OPT_FLOAT(n"-blur", scaler[i].kernel.blur, 0), \ - OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0), \ - OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0), \ - OPT_FLOATDEF(n"-wparam", scaler[i].window.params[0], 0), \ - OPT_FLOAT(n"-wblur", scaler[i].window.blur, 0), \ - OPT_FLOATRANGE(n"-wtaper", scaler[i].window.taper, 0, 0.0, 1.0), \ - OPT_FLOATRANGE(n"-clamp", scaler[i].clamp, 0, 0.0, 1.0), \ - OPT_FLOATRANGE(n"-radius", scaler[i].radius, 0, 0.5, 16.0), \ - OPT_FLOATRANGE(n"-antiring", scaler[i].antiring, 0, 0.0, 1.0), \ - OPT_STRING_VALIDATE(n"-window", scaler[i].window.name, 0, validate_window_opt) + {n, OPT_STRING_VALIDATE(scaler[i].kernel.name, validate_scaler_opt)}, \ + {n"-param1", OPT_FLOATDEF(scaler[i].kernel.params[0])}, \ + {n"-param2", OPT_FLOATDEF(scaler[i].kernel.params[1])}, \ + {n"-blur", OPT_FLOAT(scaler[i].kernel.blur)}, \ + {n"-cutoff", OPT_REMOVED("Hard-coded as 0.001")}, \ + {n"-taper", OPT_FLOAT(scaler[i].kernel.taper), M_RANGE(0.0, 1.0)}, \ + {n"-wparam", OPT_FLOATDEF(scaler[i].window.params[0])}, \ + {n"-wblur", OPT_REMOVED("Just adjust filter radius directly")}, \ + {n"-wtaper", OPT_FLOAT(scaler[i].window.taper), M_RANGE(0.0, 1.0)}, \ + {n"-clamp", OPT_FLOAT(scaler[i].clamp), M_RANGE(0.0, 1.0)}, \ + {n"-radius", OPT_FLOAT(scaler[i].radius), M_RANGE(0.5, 16.0)}, \ + {n"-antiring", OPT_FLOAT(scaler[i].antiring), M_RANGE(0.0, 1.0)}, \ + {n"-window", OPT_STRING_VALIDATE(scaler[i].window.name, validate_window_opt)} const struct m_sub_options gl_video_conf = { .opts = (const m_option_t[]) { - OPT_CHOICE("gpu-dumb-mode", dumb_mode, 0, - ({"auto", 0}, {"yes", 1}, {"no", -1})), - OPT_FLOATRANGE("gamma-factor", gamma, 0, 0.1, 2.0), - OPT_FLAG("gamma-auto", gamma_auto, 0), - OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), - OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), - OPT_CHOICE_OR_INT("target-peak", target_peak, 0, 10, 10000, - ({"auto", 0})), - OPT_CHOICE("tone-mapping", tone_map.curve, 0, - ({"clip", TONE_MAPPING_CLIP}, - {"mobius", TONE_MAPPING_MOBIUS}, - {"reinhard", TONE_MAPPING_REINHARD}, - {"hable", TONE_MAPPING_HABLE}, - {"gamma", TONE_MAPPING_GAMMA}, - {"linear", TONE_MAPPING_LINEAR})), - OPT_CHOICE("hdr-compute-peak", tone_map.compute_peak, 0, - ({"auto", 0}, - {"yes", 1}, - {"no", -1})), - OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0), - OPT_FLOATRANGE("hdr-scene-threshold-low", - tone_map.scene_threshold_low, 0, 0, 20.0), - OPT_FLOATRANGE("hdr-scene-threshold-high", - tone_map.scene_threshold_high, 0, 0, 20.0), - OPT_FLOATDEF("tone-mapping-param", tone_map.curve_param, 0), - OPT_FLOATRANGE("tone-mapping-max-boost", tone_map.max_boost, 0, 1.0, 10.0), - OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0), - OPT_FLOATRANGE("tone-mapping-desaturate-exponent", - tone_map.desat_exp, 0, 0.0, 20.0), - OPT_FLAG("gamut-warning", tone_map.gamut_warning, 0), - OPT_FLAG("opengl-pbo", pbo, 0), + {"gpu-dumb-mode", OPT_CHOICE(dumb_mode, + {"auto", 0}, {"yes", 1}, {"no", -1})}, + {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0), + .deprecation_message = "no replacement"}, + {"gamma-auto", OPT_BOOL(gamma_auto), + .deprecation_message = "no replacement"}, + {"target-prim", OPT_CHOICE_C(target_prim, pl_csp_prim_names)}, + {"target-trc", OPT_CHOICE_C(target_trc, pl_csp_trc_names)}, + {"target-peak", OPT_CHOICE(target_peak, {"auto", 0}), + M_RANGE(10, 10000)}, + {"target-contrast", OPT_CHOICE(target_contrast, {"auto", 0}, {"inf", -1}), + M_RANGE(10, 1000000)}, + {"target-gamut", OPT_CHOICE_C(target_gamut, pl_csp_prim_names)}, + {"tone-mapping", OPT_CHOICE(tone_map.curve, + {"auto", TONE_MAPPING_AUTO}, + {"clip", TONE_MAPPING_CLIP}, + {"mobius", TONE_MAPPING_MOBIUS}, + {"reinhard", TONE_MAPPING_REINHARD}, + {"hable", TONE_MAPPING_HABLE}, + {"gamma", TONE_MAPPING_GAMMA}, + {"linear", TONE_MAPPING_LINEAR}, + {"spline", TONE_MAPPING_SPLINE}, + {"bt.2390", TONE_MAPPING_BT_2390}, + {"bt.2446a", TONE_MAPPING_BT_2446A}, + {"st2094-40", TONE_MAPPING_ST2094_40}, + {"st2094-10", TONE_MAPPING_ST2094_10})}, + {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)}, + {"inverse-tone-mapping", OPT_BOOL(tone_map.inverse)}, + {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost), + M_RANGE(1.0, 10.0)}, + {"tone-mapping-visualize", OPT_BOOL(tone_map.visualize)}, + {"gamut-mapping-mode", OPT_CHOICE(tone_map.gamut_mode, + {"auto", GAMUT_AUTO}, + {"clip", GAMUT_CLIP}, + {"perceptual", GAMUT_PERCEPTUAL}, + {"relative", GAMUT_RELATIVE}, + {"saturation", GAMUT_SATURATION}, + {"absolute", GAMUT_ABSOLUTE}, + {"desaturate", GAMUT_DESATURATE}, + {"darken", GAMUT_DARKEN}, + {"warn", GAMUT_WARN}, + {"linear", GAMUT_LINEAR})}, + {"hdr-compute-peak", OPT_CHOICE(tone_map.compute_peak, + {"auto", 0}, + {"yes", 1}, + {"no", -1})}, + {"hdr-peak-percentile", OPT_FLOAT(tone_map.peak_percentile), + M_RANGE(0.0, 100.0)}, + {"hdr-peak-decay-rate", OPT_FLOAT(tone_map.decay_rate), + M_RANGE(0.0, 1000.0)}, + {"hdr-scene-threshold-low", OPT_FLOAT(tone_map.scene_threshold_low), + M_RANGE(0, 20.0)}, + {"hdr-scene-threshold-high", OPT_FLOAT(tone_map.scene_threshold_high), + M_RANGE(0, 20.0)}, + {"hdr-contrast-recovery", OPT_FLOAT(tone_map.contrast_recovery), + M_RANGE(0, 2.0)}, + {"hdr-contrast-smoothness", OPT_FLOAT(tone_map.contrast_smoothness), + M_RANGE(1.0, 100.0)}, + {"opengl-pbo", OPT_BOOL(pbo)}, SCALER_OPTS("scale", SCALER_SCALE), SCALER_OPTS("dscale", SCALER_DSCALE), SCALER_OPTS("cscale", SCALER_CSCALE), SCALER_OPTS("tscale", SCALER_TSCALE), - OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10), - OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0), - OPT_FLAG("correct-downscaling", correct_downscaling, 0), - OPT_FLAG("linear-downscaling", linear_downscaling, 0), - OPT_FLAG("linear-upscaling", linear_upscaling, 0), - OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0), - OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0), - OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0), - OPT_STRING("fbo-format", fbo_format, 0), - OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16, - ({"no", -1}, {"auto", 0})), - OPT_CHOICE("dither", dither_algo, 0, - ({"fruit", DITHER_FRUIT}, - {"ordered", DITHER_ORDERED}, - {"error-diffusion", DITHER_ERROR_DIFFUSION}, - {"no", DITHER_NONE})), - OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8), - OPT_FLAG("temporal-dither", temporal_dither, 0), - OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128), - OPT_STRING_VALIDATE("error-diffusion", error_diffusion, 0, - validate_error_diffusion_opt), - OPT_CHOICE("alpha", alpha_mode, 0, - ({"no", ALPHA_NO}, - {"yes", ALPHA_YES}, - {"blend", ALPHA_BLEND}, - {"blend-tiles", ALPHA_BLEND_TILES})), - OPT_FLAG("opengl-rectangle-textures", use_rectangle, 0), - OPT_COLOR("background", background, 0), - OPT_FLAG("interpolation", interpolation, 0), - OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0), - OPT_CHOICE("blend-subtitles", blend_subs, 0, - ({"no", BLEND_SUBS_NO}, - {"yes", BLEND_SUBS_YES}, - {"video", BLEND_SUBS_VIDEO})), - OPT_PATHLIST("glsl-shaders", user_shaders, M_OPT_FILE), - OPT_CLI_ALIAS("glsl-shader", "glsl-shaders-append"), - OPT_FLAG("deband", deband, 0), - OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), - OPT_FLOAT("sharpen", unsharp, 0), - OPT_INTRANGE("gpu-tex-pad-x", tex_pad_x, 0, 0, 4096), - OPT_INTRANGE("gpu-tex-pad-y", tex_pad_y, 0, 0, 4096), - OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), - OPT_STRING("gpu-shader-cache-dir", shader_cache_dir, M_OPT_FILE), - OPT_STRING_VALIDATE("gpu-hwdec-interop", hwdec_interop, 0, - ra_hwdec_validate_opt), - OPT_REPLACED("opengl-hwdec-interop", "gpu-hwdec-interop"), - OPT_REPLACED("hwdec-preload", "opengl-hwdec-interop"), - OPT_REPLACED("hdr-tone-mapping", "tone-mapping"), - OPT_REPLACED("opengl-shaders", "glsl-shaders"), - OPT_REPLACED("opengl-shader", "glsl-shader"), - OPT_REPLACED("opengl-shader-cache-dir", "gpu-shader-cache-dir"), - OPT_REPLACED("opengl-tex-pad-x", "gpu-tex-pad-x"), - OPT_REPLACED("opengl-tex-pad-y", "gpu-tex-pad-y"), - OPT_REPLACED("opengl-fbo-format", "fbo-format"), - OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"), - OPT_REPLACED("opengl-gamma", "gamma-factor"), - OPT_REMOVED("linear-scaling", "Split into --linear-upscaling and " - "--linear-downscaling"), + {"scaler-lut-size", OPT_REMOVED("hard-coded as 8")}, + {"scaler-resizes-only", OPT_BOOL(scaler_resizes_only)}, + {"correct-downscaling", OPT_BOOL(correct_downscaling)}, + {"linear-downscaling", OPT_BOOL(linear_downscaling)}, + {"linear-upscaling", OPT_BOOL(linear_upscaling)}, + {"sigmoid-upscaling", OPT_BOOL(sigmoid_upscaling)}, + {"sigmoid-center", OPT_FLOAT(sigmoid_center), M_RANGE(0.0, 1.0)}, + {"sigmoid-slope", OPT_FLOAT(sigmoid_slope), M_RANGE(1.0, 20.0)}, + {"fbo-format", OPT_STRING(fbo_format)}, + {"dither-depth", OPT_CHOICE(dither_depth, {"no", -1}, {"auto", 0}), + M_RANGE(-1, 16)}, + {"dither", OPT_CHOICE(dither_algo, + {"fruit", DITHER_FRUIT}, + {"ordered", DITHER_ORDERED}, + {"error-diffusion", DITHER_ERROR_DIFFUSION}, + {"no", DITHER_NONE})}, + {"dither-size-fruit", OPT_INT(dither_size), M_RANGE(2, 8)}, + {"temporal-dither", OPT_BOOL(temporal_dither)}, + {"temporal-dither-period", OPT_INT(temporal_dither_period), + M_RANGE(1, 128)}, + {"error-diffusion", + OPT_STRING_VALIDATE(error_diffusion, validate_error_diffusion_opt)}, + {"background", OPT_CHOICE(background, + {"none", BACKGROUND_NONE}, + {"color", BACKGROUND_COLOR}, + {"tiles", BACKGROUND_TILES})}, + {"opengl-rectangle-textures", OPT_BOOL(use_rectangle)}, + {"background-color", OPT_COLOR(background_color)}, + {"interpolation", OPT_BOOL(interpolation)}, + {"interpolation-threshold", OPT_FLOAT(interpolation_threshold)}, + {"blend-subtitles", OPT_CHOICE(blend_subs, + {"no", BLEND_SUBS_NO}, + {"yes", BLEND_SUBS_YES}, + {"video", BLEND_SUBS_VIDEO})}, + {"glsl-shaders", OPT_PATHLIST(user_shaders), .flags = M_OPT_FILE}, + {"glsl-shader", OPT_CLI_ALIAS("glsl-shaders-append")}, + {"glsl-shader-opts", OPT_KEYVALUELIST(user_shader_opts)}, + {"deband", OPT_BOOL(deband)}, + {"deband", OPT_SUBSTRUCT(deband_opts, deband_conf)}, + {"sharpen", OPT_FLOAT(unsharp)}, + {"gpu-tex-pad-x", OPT_INT(tex_pad_x), M_RANGE(0, 4096)}, + {"gpu-tex-pad-y", OPT_INT(tex_pad_y), M_RANGE(0, 4096)}, + {"", OPT_SUBSTRUCT(icc_opts, mp_icc_conf)}, + {"gpu-shader-cache", OPT_BOOL(shader_cache)}, + {"gpu-shader-cache-dir", OPT_STRING(shader_cache_dir), .flags = M_OPT_FILE}, + {"gpu-hwdec-interop", + OPT_STRING_VALIDATE(hwdec_interop, ra_hwdec_validate_opt)}, + {"gamut-warning", OPT_REMOVED("Replaced by --gamut-mapping-mode=warn")}, + {"gamut-clipping", OPT_REMOVED("Replaced by --gamut-mapping-mode=desaturate")}, + {"tone-mapping-desaturate", OPT_REMOVED("Replaced by --tone-mapping-mode")}, + {"tone-mapping-desaturate-exponent", OPT_REMOVED("Replaced by --tone-mapping-mode")}, + {"tone-mapping-crosstalk", OPT_REMOVED("Hard-coded as 0.04")}, + {"tone-mapping-mode", OPT_REMOVED("no replacement")}, {0} }, .size = sizeof(struct gl_video_opts), .defaults = &gl_video_opts_def, + .change_flags = UPDATE_VIDEO, }; static void uninit_rendering(struct gl_video *p); @@ -488,7 +506,9 @@ static struct bstr load_cached_file(struct gl_video *p, const char *path) return p->files[n].body; } // not found -> load it - struct bstr s = stream_read_file(path, p, p->global, 1000000000); // 1GB + char *fname = mp_get_user_path(NULL, p->global, path); + struct bstr s = stream_read_file(fname, p, p->global, 1000000000); // 1GB + talloc_free(fname); if (s.len) { struct cached_file new = { .path = talloc_strdup(p, path), @@ -555,6 +575,7 @@ static void uninit_rendering(struct gl_video *p) ra_tex_free(p->ra, &p->merge_tex[n]); ra_tex_free(p->ra, &p->scale_tex[n]); ra_tex_free(p->ra, &p->integer_tex[n]); + ra_tex_free(p->ra, &p->chroma_tex[n]); } ra_tex_free(p->ra, &p->indirect_tex); @@ -582,15 +603,6 @@ bool gl_video_gamma_auto_enabled(struct gl_video *p) return p->opts.gamma_auto; } -struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p) -{ - return (struct mp_colorspace) { - .primaries = p->opts.target_prim, - .gamma = p->opts.target_trc, - .sig_peak = p->opts.target_peak / MP_REF_WHITE, - }; -} - // Warning: profile.start must point to a ta allocation, and the function // takes over ownership. void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) @@ -604,8 +616,8 @@ bool gl_video_icc_auto_enabled(struct gl_video *p) return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; } -static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, - enum mp_csp_trc trc) +static bool gl_video_get_lut3d(struct gl_video *p, enum pl_color_primaries prim, + enum pl_color_transfer trc) { if (!p->use_lut_3d) return false; @@ -653,6 +665,11 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, talloc_free(lut3d); + if (!p->lut_3d_texture) { + p->use_lut_3d = false; + return false; + } + return true; } @@ -743,16 +760,16 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg, struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; - if (p->image_params.chroma_location != MP_CHROMA_CENTER) { - int cx, cy; - mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy); + if (p->image_params.chroma_location != PL_CHROMA_CENTER) { + float cx, cy; + pl_chroma_location_offset(p->image_params.chroma_location, &cx, &cy); // By default texture coordinates are such that chroma is centered with // any chroma subsampling. If a specific direction is given, make it // so that the luma and chroma sample line up exactly. // For 4:4:4, setting chroma location should have no effect at all. // luma sample size (in chroma coord. space) - chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; - chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + chroma.t[0] = ls_w < 1 ? ls_w * -cx : 0; + chroma.t[1] = ls_h < 1 ? ls_h * -cy : 0; } memset(img, 0, 4 * sizeof(img[0])); @@ -768,9 +785,9 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg, ctype = PLANE_NONE; } else if (c == 4) { ctype = PLANE_ALPHA; - } else if (p->image_params.color.space == MP_CSP_RGB) { + } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_RGB) { ctype = PLANE_RGB; - } else if (p->image_params.color.space == MP_CSP_XYZ) { + } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_XYZ) { ctype = PLANE_XYZ; } else { ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA; @@ -782,9 +799,11 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg, int msb_valid_bits = p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); - int csp = type == PLANE_ALPHA ? MP_CSP_RGB : p->image_params.color.space; + int csp = type == PLANE_ALPHA ? PL_COLOR_SYSTEM_RGB : p->image_params.repr.sys; float tex_mul = 1.0 / mp_get_csp_mul(csp, msb_valid_bits, p->ra_format.component_bits); + if (p->ra_format.component_type == RA_CTYPE_FLOAT) + tex_mul = 1.0; img[n] = (struct image){ .type = type, @@ -854,14 +873,7 @@ static void init_video(struct gl_video *p) { p->use_integer_conversion = false; - struct ra_hwdec *hwdec = NULL; - for (int n = 0; n < p->num_hwdecs; n++) { - if (ra_hwdec_test_format(p->hwdecs[n], p->image_params.imgfmt)) { - hwdec = p->hwdecs[n]; - break; - } - } - + struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, p->image_params.imgfmt); if (hwdec) { if (hwdec->driver->overlay_frame) { MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " @@ -939,9 +951,6 @@ static void init_video(struct gl_video *p) params.w, params.h); plane->tex = ra_tex_create(p->ra, ¶ms); - if (!plane->tex) - abort(); // shit happens - p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; } } @@ -1045,13 +1054,13 @@ static void uninit_video(struct gl_video *p) ra_hwdec_mapper_free(&p->hwdec_mapper); } -static void pass_record(struct gl_video *p, struct mp_pass_perf perf) +static void pass_record(struct gl_video *p, const struct mp_pass_perf *perf) { if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) return; struct pass_info *pass = &p->pass[p->pass_idx]; - pass->perf = perf; + pass->perf = *perf; if (pass->desc.len == 0) bstr_xappend(p, &pass->desc, bstr0("(unknown)")); @@ -1178,16 +1187,8 @@ static void dispatch_compute(struct gl_video *p, int w, int h, if (!s->tex) continue; - // We need to rescale the coordinates to the true texture size - char *tex_scale = mp_tprintf(32, "tex_scale%d", n); - gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){ - (float)s->w / s->tex->params.w, - (float)s->h / s->tex->params.h, - }); - - PRELUDE("#define texmap%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n); - PRELUDE("#define texmap%d(id) (texture_rot%d * texmap%d_raw(id) + " - "pixel_size%d * texture_off%d)\n", n, n, n, n, n); + PRELUDE("#define texmap%d(id) (texture_rot%d * outcoord(id) + " + "pixel_size%d * texture_off%d)\n", n, n, n, n); PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n); } @@ -1199,12 +1200,13 @@ static void dispatch_compute(struct gl_video *p, int w, int h, if (!(p->ra->caps & RA_CAP_NUM_GROUPS)) PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y); - pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1)); + struct mp_pass_perf perf = gl_sc_dispatch_compute(p->sc, num_x, num_y, 1); + pass_record(p, &perf); cleanup_binds(p); } static struct mp_pass_perf render_pass_quad(struct gl_video *p, - struct ra_fbo fbo, bool discard, + const struct ra_fbo *fbo, bool discard, const struct mp_rect *dst) { // The first element is reserved for `vec2 position` @@ -1262,15 +1264,16 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p, &p->tmp_vertex[num_vertex_attribs * 1], vertex_stride); - return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs, + return gl_sc_dispatch_draw(p->sc, fbo->tex, discard, p->vao, num_vertex_attribs, vertex_stride, p->tmp_vertex, num_vertices); } -static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo, +static void finish_pass_fbo(struct gl_video *p, const struct ra_fbo *fbo, bool discard, const struct mp_rect *dst) { pass_prepare_src_tex(p); - pass_record(p, render_pass_quad(p, fbo, discard, dst)); + struct mp_pass_perf perf = render_pass_quad(p, fbo, discard, dst); + pass_record(p, &perf); debug_check_gl(p, "after rendering"); cleanup_binds(p); } @@ -1290,8 +1293,11 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders // over fragment shaders wherever possible. - if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE)) + if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE) && + (*dst_tex)->params.storage_dst) + { pass_is_compute(p, 16, 16, true); + } if (p->pass_compute.active) { gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex); @@ -1304,7 +1310,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, debug_check_gl(p, "after dispatching compute shader"); } else { struct ra_fbo fbo = { .tex = *dst_tex, }; - finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h}); + finish_pass_fbo(p, &fbo, true, &(struct mp_rect){0, 0, w, h}); } } @@ -1317,18 +1323,20 @@ static const char *get_tex_swizzle(struct image *img) // Copy a texture to the vec4 color, while increasing offset. Also applies // the texture multiplier to the sampled color -static void copy_image(struct gl_video *p, int *offset, struct image img) +static void copy_image(struct gl_video *p, unsigned int *offset, struct image img) { - int count = img.components; - assert(*offset + count <= 4); - assert(img.padding + count <= 4); - - int id = pass_bind(p, img); + const unsigned int count = img.components; char src[5] = {0}; char dst[5] = {0}; + + assert(*offset + count < sizeof(dst)); + assert(img.padding + count < sizeof(src)); + + int id = pass_bind(p, img); + const char *tex_fmt = get_tex_swizzle(&img); const char *dst_fmt = "rgba"; - for (int i = 0; i < count; i++) { + for (unsigned int i = 0; i < count; i++) { src[i] = tex_fmt[img.padding + i]; dst[i] = dst_fmt[*offset + i]; } @@ -1387,6 +1395,11 @@ static void hook_prelude(struct gl_video *p, const char *name, int id, GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n", name, name, name, crap); + if (p->ra->caps & RA_CAP_GATHER) { + GLSLHF("#define %s_gather(pos, c) (%s_mul * vec4(" + "textureGather(%s_raw, pos, c)))\n", name, name, name); + } + // Since the extra matrix multiplication impacts performance, // skip it unless the texture was actually rotated if (gl_transform_eq(img.transform, identity_trans)) { @@ -1680,6 +1693,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, double scale_factor, int sizes[]) { + assert(conf); if (scaler_conf_eq(scaler->conf, *conf) && scaler->scale_factor == scale_factor && scaler->initialized) @@ -1687,9 +1701,29 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, uninit_scaler(p, scaler); + if (scaler->index == SCALER_DSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + + if (scaler->index == SCALER_CSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + + struct filter_kernel bare_window; const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name); const struct filter_window *t_window = mp_find_filter_window(conf->window.name); bool is_tscale = scaler->index == SCALER_TSCALE; + if (!t_kernel) { + const struct filter_window *window = mp_find_filter_window(conf->kernel.name); + if (window) { + bare_window = (struct filter_kernel) { .f = *window }; + t_kernel = &bare_window; + } + } scaler->conf = *conf; scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale); @@ -1731,8 +1765,6 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, scaler->kernel->f.radius = conf->radius; scaler->kernel->clamp = conf->clamp; - scaler->kernel->value_cutoff = conf->cutoff; - scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor); int size = scaler->kernel->size; @@ -1744,17 +1776,16 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, int stride = width * num_components; assert(size <= stride); - scaler->lut_size = 1 << p->opts.scaler_lut_size; - - float *weights = talloc_array(NULL, float, scaler->lut_size * stride); - mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights); + static const int lut_size = 256; + float *weights = talloc_array(NULL, float, lut_size * stride); + mp_compute_lut(scaler->kernel, lut_size, stride, weights); bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D); struct ra_tex_params lut_params = { .dimensions = use_1d ? 1 : 2, - .w = use_1d ? scaler->lut_size : width, - .h = use_1d ? 1 : scaler->lut_size, + .w = use_1d ? lut_size : width, + .h = use_1d ? 1 : lut_size, .d = 1, .format = fmt, .render_src = true, @@ -1884,8 +1915,7 @@ static void pass_sample(struct gl_video *p, struct image img, } else if (scaler->kernel) { pass_sample_separated(p, img, scaler, w, h); } else { - // Should never happen - abort(); + MP_ASSERT_UNREACHABLE(); // should never happen } // Apply any required multipliers. Separated scaling already does this in @@ -1916,7 +1946,7 @@ static void deband_hook(struct gl_video *p, struct image img, { pass_describe(p, "debanding (%s)", plane_names[img.type]); pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg, - p->image_params.color.gamma); + p->image_params.color.transfer); } static void unsharp_hook(struct gl_video *p, struct image img, @@ -2007,25 +2037,23 @@ static void user_hook(struct gl_video *p, struct image img, gl_transform_trans(shader->offset, trans); } -static bool add_user_hook(void *priv, struct gl_user_shader_hook hook) +static bool add_user_hook(void *priv, const struct gl_user_shader_hook *hook) { struct gl_video *p = priv; - struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy); - *copy = hook; - + struct gl_user_shader_hook *copy = talloc_dup(p, (struct gl_user_shader_hook *)hook); struct tex_hook texhook = { - .save_tex = bstrdup0(copy, hook.save_tex), - .components = hook.components, - .align_offset = hook.align_offset, + .save_tex = bstrdup0(copy, copy->save_tex), + .components = copy->components, + .align_offset = copy->align_offset, .hook = user_hook, .cond = user_hook_cond, .priv = copy, }; for (int h = 0; h < SHADER_MAX_HOOKS; h++) - texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]); + texhook.hook_tex[h] = bstrdup0(copy, copy->hook_tex[h]); for (int h = 0; h < SHADER_MAX_BINDS; h++) - texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]); + texhook.bind_tex[h] = bstrdup0(copy, copy->bind_tex[h]); MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook); return true; @@ -2174,6 +2202,23 @@ static void pass_read_video(struct gl_video *p) } } + // If chroma textures are in a subsampled semi-planar format and rotated, + // introduce an explicit conversion pass to avoid breaking chroma scalers. + for (int n = 0; n < 4; n++) { + if (img[n].tex && img[n].type == PLANE_CHROMA && + img[n].tex->params.format->num_components == 2 && + p->image_params.rotate % 180 == 90 && + p->ra_format.chroma_w != 1) + { + GLSLF("// chroma fix for rotated plane %d\n", n); + copy_image(p, &(int){0}, img[n]); + pass_describe(p, "chroma fix for rotated plane"); + finish_pass_tex(p, &p->chroma_tex[n], img[n].w, img[n].h); + img[n] = image_wrap(p->chroma_tex[n], img[n].type, + img[n].components); + } + } + // At this point all planes are finalized but they may not be at the // required size yet. Furthermore, they may have texture offsets that // require realignment. @@ -2249,6 +2294,13 @@ static void pass_read_video(struct gl_video *p) continue; const struct scaler_config *conf = &p->opts.scaler[scaler_id]; + + if (scaler_id == SCALER_CSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + struct scaler *scaler = &p->scaler[scaler_id]; // bilinear scaling is a free no-op thanks to GPU sampling @@ -2288,6 +2340,7 @@ static void pass_convert_yuv(struct gl_video *p) struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; cparams.gray = p->is_gray; + cparams.is_float = p->ra_format.component_type == RA_CTYPE_FLOAT; mp_csp_set_image_params(&cparams, &p->image_params); mp_csp_equalizer_state_get(p->video_eq, &cparams); p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma); @@ -2298,22 +2351,29 @@ static void pass_convert_yuv(struct gl_video *p) GLSLF("color = color.%s;\n", p->color_swizzle); // Pre-colormatrix input gamma correction - if (cparams.color.space == MP_CSP_XYZ) - GLSL(color.rgb = pow(color.rgb, vec3(2.6));) // linear light + if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ) + pass_linearize(p->sc, p->image_params.color.transfer); // We always explicitly normalize the range in pass_read_video cparams.input_bits = cparams.texture_bits = 0; // Conversion to RGB. For RGB itself, this still applies e.g. brightness // and contrast controls, or expansion of e.g. LSB-packed 10 bit data. - struct mp_cmat m = {{{0}}}; + struct pl_transform3x3 m = {0}; mp_get_csp_matrix(&cparams, &m); - gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]); + gl_sc_uniform_mat3(sc, "colormatrix", true, &m.mat.m[0][0]); gl_sc_uniform_vec3(sc, "colormatrix_c", m.c); GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) - if (p->image_params.color.space == MP_CSP_BT_2020_C) { + if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ) { + pass_delinearize(p->sc, p->image_params.color.transfer); + // mp_get_csp_matrix implicitly converts XYZ to DCI-P3 + p->image_params.repr.sys = PL_COLOR_SYSTEM_RGB; + p->image_params.color.primaries = PL_COLOR_PRIM_DCI_P3; + } + + if (p->image_params.repr.sys == PL_COLOR_SYSTEM_BT_2020_C) { // Conversion for C'rcY'cC'bc via the BT.2020 CL system: // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 // = (B'-Y'c) / 1.5816 | C'bc > 0 @@ -2324,34 +2384,39 @@ static void pass_convert_yuv(struct gl_video *p) // as per the BT.2020 specification, table 4. This is a non-linear // transformation because (constant) luminance receives non-equal // contributions from the three different channels. - GLSLF("// constant luminance conversion\n"); - GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936), - vec2(1.9404, 1.7184), - lessThanEqual(color.br, vec2(0))) - + color.gg;) + GLSLF("// constant luminance conversion \n" + "color.br = color.br * mix(vec2(1.5816, 0.9936), \n" + " vec2(1.9404, 1.7184), \n" + " %s(lessThanEqual(color.br, vec2(0))))\n" + " + color.gg; \n", + gl_sc_bvec(p->sc, 2)); // Expand channels to camera-linear light. This shader currently just // assumes everything uses the BT. |