1 files changed, 574 insertions, 420 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 88a8557cc2..1478ec4687 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -29,6 +29,7 @@
 
 #include "misc/bstr.h"
 #include "options/m_config.h"
+#include "options/path.h"
 #include "common/global.h"
 #include "options/options.h"
 #include "utils.h"
@@ -182,6 +183,7 @@ struct gl_video {
 
     struct mp_image_params real_image_params;   // configured format
     struct mp_image_params image_params;        // texture format (mind hwdec case)
+    struct mp_image_params target_params;       // target format
     struct ra_imgfmt_desc ra_format;            // texture format
     int plane_count;
 
@@ -211,6 +213,7 @@ struct gl_video {
     struct ra_tex *merge_tex[4];
     struct ra_tex *scale_tex[4];
     struct ra_tex *integer_tex[4];
+    struct ra_tex *chroma_tex[4];
     struct ra_tex *indirect_tex;
     struct ra_tex *blend_subs_tex;
     struct ra_tex *error_diffusion_tex[2];
@@ -280,10 +283,7 @@ struct gl_video {
     struct cached_file *files;
     int num_files;
 
-    bool hwdec_interop_loading_done;
-    struct ra_hwdec **hwdecs;
-    int num_hwdecs;
-
+    struct ra_hwdec_ctx hwdec_ctx;
     struct ra_hwdec_mapper *hwdec_mapper;
     struct ra_hwdec *hwdec_overlay;
     bool hwdec_active;
@@ -292,11 +292,11 @@ struct gl_video {
     bool broken_frame; // temporary error state
 
     bool colorspace_override_warned;
+    bool correct_downscaling_warned;
 };
 
 static const struct gl_video_opts gl_video_opts_def = {
     .dither_algo = DITHER_FRUIT,
-    .dither_depth = -1,
     .dither_size = 6,
     .temporal_dither_period = 1,
     .error_diffusion = "sierra-lite",
@@ -304,164 +304,182 @@ static const struct gl_video_opts gl_video_opts_def = {
     .sigmoid_center = 0.75,
     .sigmoid_slope = 6.5,
     .scaler = {
-        {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
-         .cutoff = 0.001}, // scale
-        {{NULL,       .params={NAN, NAN}}, {.params = {NAN, NAN}},
-         .cutoff = 0.001}, // dscale
-        {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
-         .cutoff = 0.001}, // cscale
-        {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}},
-         .clamp = 1, }, // tscale
+        {{"lanczos", .params={NAN, NAN}}, {.params = {NAN, NAN}}},    // scale
+        {{"hermite", .params={NAN, NAN}}, {.params = {NAN, NAN}}},    // dscale
+        {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}},         // cscale
+        {{"oversample", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // tscale
     },
-    .scaler_resizes_only = 1,
-    .scaler_lut_size = 6,
-    .interpolation_threshold = 0.0001,
-    .alpha_mode = ALPHA_BLEND_TILES,
-    .background = {0, 0, 0, 255},
+    .scaler_resizes_only = true,
+    .correct_downscaling = true,
+    .linear_downscaling = true,
+    .sigmoid_upscaling = true,
+    .interpolation_threshold = 0.01,
+    .background = BACKGROUND_TILES,
+    .background_color = {0, 0, 0, 255},
     .gamma = 1.0f,
     .tone_map = {
-        .curve = TONE_MAPPING_HABLE,
+        .curve = TONE_MAPPING_AUTO,
         .curve_param = NAN,
         .max_boost = 1.0,
-        .decay_rate = 100.0,
-        .scene_threshold_low = 5.5,
-        .scene_threshold_high = 10.0,
-        .desat = 0.75,
-        .desat_exp = 1.5,
+        .decay_rate = 20.0,
+        .scene_threshold_low = 1.0,
+        .scene_threshold_high = 3.0,
+        .contrast_smoothness = 3.5,
     },
     .early_flush = -1,
+    .shader_cache = true,
     .hwdec_interop = "auto",
 };
 
-static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
-                               struct bstr name, struct bstr param);
-
-static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
-                               struct bstr name, struct bstr param);
-
-static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt,
-                                        struct bstr name, struct bstr param);
+static OPT_STRING_VALIDATE_FUNC(validate_scaler_opt);
+static OPT_STRING_VALIDATE_FUNC(validate_window_opt);
+static OPT_STRING_VALIDATE_FUNC(validate_error_diffusion_opt);
 
 #define OPT_BASE_STRUCT struct gl_video_opts
 
 // Use for options which use NAN for defaults.
-#define OPT_FLOATDEF(name, var, flags) \
-    OPT_FLOAT(name, var, (flags) | M_OPT_DEFAULT_NAN)
+#define OPT_FLOATDEF(field) \
+    OPT_FLOAT(field), \
+    .flags = M_OPT_DEFAULT_NAN
 
 #define SCALER_OPTS(n, i) \
-    OPT_STRING_VALIDATE(n, scaler[i].kernel.name, 0, validate_scaler_opt), \
-    OPT_FLOATDEF(n"-param1", scaler[i].kernel.params[0], 0),               \
-    OPT_FLOATDEF(n"-param2", scaler[i].kernel.params[1], 0),               \
-    OPT_FLOAT(n"-blur",   scaler[i].kernel.blur, 0),                       \
-    OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0),             \
-    OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0),        \
-    OPT_FLOATDEF(n"-wparam", scaler[i].window.params[0], 0),               \
-    OPT_FLOAT(n"-wblur",  scaler[i].window.blur, 0),                       \
-    OPT_FLOATRANGE(n"-wtaper", scaler[i].window.taper, 0, 0.0, 1.0),       \
-    OPT_FLOATRANGE(n"-clamp", scaler[i].clamp, 0, 0.0, 1.0),               \
-    OPT_FLOATRANGE(n"-radius",    scaler[i].radius, 0, 0.5, 16.0),         \
-    OPT_FLOATRANGE(n"-antiring",  scaler[i].antiring, 0, 0.0, 1.0),        \
-    OPT_STRING_VALIDATE(n"-window", scaler[i].window.name, 0, validate_window_opt)
+    {n, OPT_STRING_VALIDATE(scaler[i].kernel.name, validate_scaler_opt)},  \
+    {n"-param1", OPT_FLOATDEF(scaler[i].kernel.params[0])},                \
+    {n"-param2", OPT_FLOATDEF(scaler[i].kernel.params[1])},                \
+    {n"-blur",   OPT_FLOAT(scaler[i].kernel.blur)},                        \
+    {n"-cutoff", OPT_REMOVED("Hard-coded as 0.001")},                      \
+    {n"-taper",  OPT_FLOAT(scaler[i].kernel.taper), M_RANGE(0.0, 1.0)},    \
+    {n"-wparam", OPT_FLOATDEF(scaler[i].window.params[0])},                \
+    {n"-wblur",  OPT_REMOVED("Just adjust filter radius directly")},       \
+    {n"-wtaper", OPT_FLOAT(scaler[i].window.taper), M_RANGE(0.0, 1.0)},    \
+    {n"-clamp",  OPT_FLOAT(scaler[i].clamp), M_RANGE(0.0, 1.0)},           \
+    {n"-radius", OPT_FLOAT(scaler[i].radius), M_RANGE(0.5, 16.0)},         \
+    {n"-antiring", OPT_FLOAT(scaler[i].antiring), M_RANGE(0.0, 1.0)},      \
+    {n"-window", OPT_STRING_VALIDATE(scaler[i].window.name, validate_window_opt)}
 
 const struct m_sub_options gl_video_conf = {
     .opts = (const m_option_t[]) {
-        OPT_CHOICE("gpu-dumb-mode", dumb_mode, 0,
-                   ({"auto", 0}, {"yes", 1}, {"no", -1})),
-        OPT_FLOATRANGE("gamma-factor", gamma, 0, 0.1, 2.0),
-        OPT_FLAG("gamma-auto", gamma_auto, 0),
-        OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
-        OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
-        OPT_CHOICE_OR_INT("target-peak", target_peak, 0, 10, 10000,
-                          ({"auto", 0})),
-        OPT_CHOICE("tone-mapping", tone_map.curve, 0,
-                   ({"clip",     TONE_MAPPING_CLIP},
-                    {"mobius",   TONE_MAPPING_MOBIUS},
-                    {"reinhard", TONE_MAPPING_REINHARD},
-                    {"hable",    TONE_MAPPING_HABLE},
-                    {"gamma",    TONE_MAPPING_GAMMA},
-                    {"linear",   TONE_MAPPING_LINEAR})),
-        OPT_CHOICE("hdr-compute-peak", tone_map.compute_peak, 0,
-                   ({"auto", 0},
-                    {"yes", 1},
-                    {"no", -1})),
-        OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0),
-        OPT_FLOATRANGE("hdr-scene-threshold-low",
-                       tone_map.scene_threshold_low, 0, 0, 20.0),
-        OPT_FLOATRANGE("hdr-scene-threshold-high",
-                       tone_map.scene_threshold_high, 0, 0, 20.0),
-        OPT_FLOATDEF("tone-mapping-param", tone_map.curve_param, 0),
-        OPT_FLOATRANGE("tone-mapping-max-boost", tone_map.max_boost, 0, 1.0, 10.0),
-        OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
-        OPT_FLOATRANGE("tone-mapping-desaturate-exponent",
-                       tone_map.desat_exp, 0, 0.0, 20.0),
-        OPT_FLAG("gamut-warning", tone_map.gamut_warning, 0),
-        OPT_FLAG("opengl-pbo", pbo, 0),
+        {"gpu-dumb-mode", OPT_CHOICE(dumb_mode,
+            {"auto", 0}, {"yes", 1}, {"no", -1})},
+        {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0),
+            .deprecation_message = "no replacement"},
+        {"gamma-auto", OPT_BOOL(gamma_auto),
+            .deprecation_message = "no replacement"},
+        {"target-prim", OPT_CHOICE_C(target_prim, pl_csp_prim_names)},
+        {"target-trc", OPT_CHOICE_C(target_trc, pl_csp_trc_names)},
+        {"target-peak", OPT_CHOICE(target_peak, {"auto", 0}),
+            M_RANGE(10, 10000)},
+        {"target-contrast", OPT_CHOICE(target_contrast, {"auto", 0}, {"inf", -1}),
+            M_RANGE(10, 1000000)},
+        {"target-gamut", OPT_CHOICE_C(target_gamut, pl_csp_prim_names)},
+        {"tone-mapping", OPT_CHOICE(tone_map.curve,
+            {"auto",     TONE_MAPPING_AUTO},
+            {"clip",     TONE_MAPPING_CLIP},
+            {"mobius",   TONE_MAPPING_MOBIUS},
+            {"reinhard", TONE_MAPPING_REINHARD},
+            {"hable",    TONE_MAPPING_HABLE},
+            {"gamma",    TONE_MAPPING_GAMMA},
+            {"linear",   TONE_MAPPING_LINEAR},
+            {"spline",   TONE_MAPPING_SPLINE},
+            {"bt.2390",  TONE_MAPPING_BT_2390},
+            {"bt.2446a", TONE_MAPPING_BT_2446A},
+            {"st2094-40", TONE_MAPPING_ST2094_40},
+            {"st2094-10", TONE_MAPPING_ST2094_10})},
+        {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)},
+        {"inverse-tone-mapping", OPT_BOOL(tone_map.inverse)},
+        {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost),
+            M_RANGE(1.0, 10.0)},
+        {"tone-mapping-visualize", OPT_BOOL(tone_map.visualize)},
+        {"gamut-mapping-mode", OPT_CHOICE(tone_map.gamut_mode,
+            {"auto",        GAMUT_AUTO},
+            {"clip",        GAMUT_CLIP},
+            {"perceptual",  GAMUT_PERCEPTUAL},
+            {"relative",    GAMUT_RELATIVE},
+            {"saturation",  GAMUT_SATURATION},
+            {"absolute",    GAMUT_ABSOLUTE},
+            {"desaturate",  GAMUT_DESATURATE},
+            {"darken",      GAMUT_DARKEN},
+            {"warn",        GAMUT_WARN},
+            {"linear",      GAMUT_LINEAR})},
+        {"hdr-compute-peak", OPT_CHOICE(tone_map.compute_peak,
+            {"auto", 0},
+            {"yes", 1},
+            {"no", -1})},
+        {"hdr-peak-percentile", OPT_FLOAT(tone_map.peak_percentile),
+            M_RANGE(0.0, 100.0)},
+        {"hdr-peak-decay-rate", OPT_FLOAT(tone_map.decay_rate),
+            M_RANGE(0.0, 1000.0)},
+        {"hdr-scene-threshold-low", OPT_FLOAT(tone_map.scene_threshold_low),
+            M_RANGE(0, 20.0)},
+        {"hdr-scene-threshold-high", OPT_FLOAT(tone_map.scene_threshold_high),
+            M_RANGE(0, 20.0)},
+        {"hdr-contrast-recovery", OPT_FLOAT(tone_map.contrast_recovery),
+            M_RANGE(0, 2.0)},
+        {"hdr-contrast-smoothness", OPT_FLOAT(tone_map.contrast_smoothness),
+            M_RANGE(1.0, 100.0)},
+        {"opengl-pbo", OPT_BOOL(pbo)},
         SCALER_OPTS("scale",  SCALER_SCALE),
         SCALER_OPTS("dscale", SCALER_DSCALE),
         SCALER_OPTS("cscale", SCALER_CSCALE),
         SCALER_OPTS("tscale", SCALER_TSCALE),
-        OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10),
-        OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0),
-        OPT_FLAG("correct-downscaling", correct_downscaling, 0),
-        OPT_FLAG("linear-downscaling", linear_downscaling, 0),
-        OPT_FLAG("linear-upscaling", linear_upscaling, 0),
-        OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0),
-        OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0),
-        OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0),
-        OPT_STRING("fbo-format", fbo_format, 0),
-        OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
-                          ({"no", -1}, {"auto", 0})),
-        OPT_CHOICE("dither", dither_algo, 0,
-                   ({"fruit", DITHER_FRUIT},
-                    {"ordered", DITHER_ORDERED},
-                    {"error-diffusion", DITHER_ERROR_DIFFUSION},
-                    {"no", DITHER_NONE})),
-        OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
-        OPT_FLAG("temporal-dither", temporal_dither, 0),
-        OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128),
-        OPT_STRING_VALIDATE("error-diffusion", error_diffusion, 0,
-                            validate_error_diffusion_opt),
-        OPT_CHOICE("alpha", alpha_mode, 0,
-                   ({"no", ALPHA_NO},
-                    {"yes", ALPHA_YES},
-                    {"blend", ALPHA_BLEND},
-                    {"blend-tiles", ALPHA_BLEND_TILES})),
-        OPT_FLAG("opengl-rectangle-textures", use_rectangle, 0),
-        OPT_COLOR("background", background, 0),
-        OPT_FLAG("interpolation", interpolation, 0),
-        OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0),
-        OPT_CHOICE("blend-subtitles", blend_subs, 0,
-                   ({"no", BLEND_SUBS_NO},
-                    {"yes", BLEND_SUBS_YES},
-                    {"video", BLEND_SUBS_VIDEO})),
-        OPT_PATHLIST("glsl-shaders", user_shaders, M_OPT_FILE),
-        OPT_CLI_ALIAS("glsl-shader", "glsl-shaders-append"),
-        OPT_FLAG("deband", deband, 0),
-        OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
-        OPT_FLOAT("sharpen", unsharp, 0),
-        OPT_INTRANGE("gpu-tex-pad-x", tex_pad_x, 0, 0, 4096),
-        OPT_INTRANGE("gpu-tex-pad-y", tex_pad_y, 0, 0, 4096),
-        OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0),
-        OPT_STRING("gpu-shader-cache-dir", shader_cache_dir, M_OPT_FILE),
-        OPT_STRING_VALIDATE("gpu-hwdec-interop", hwdec_interop, 0,
-                             ra_hwdec_validate_opt),
-        OPT_REPLACED("opengl-hwdec-interop", "gpu-hwdec-interop"),
-        OPT_REPLACED("hwdec-preload", "opengl-hwdec-interop"),
-        OPT_REPLACED("hdr-tone-mapping", "tone-mapping"),
-        OPT_REPLACED("opengl-shaders", "glsl-shaders"),
-        OPT_REPLACED("opengl-shader", "glsl-shader"),
-        OPT_REPLACED("opengl-shader-cache-dir", "gpu-shader-cache-dir"),
-        OPT_REPLACED("opengl-tex-pad-x", "gpu-tex-pad-x"),
-        OPT_REPLACED("opengl-tex-pad-y", "gpu-tex-pad-y"),
-        OPT_REPLACED("opengl-fbo-format", "fbo-format"),
-        OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"),
-        OPT_REPLACED("opengl-gamma", "gamma-factor"),
-        OPT_REMOVED("linear-scaling", "Split into --linear-upscaling and "
-                    "--linear-downscaling"),
+        {"scaler-lut-size", OPT_REMOVED("hard-coded as 8")},
+        {"scaler-resizes-only", OPT_BOOL(scaler_resizes_only)},
+        {"correct-downscaling", OPT_BOOL(correct_downscaling)},
+        {"linear-downscaling", OPT_BOOL(linear_downscaling)},
+        {"linear-upscaling", OPT_BOOL(linear_upscaling)},
+        {"sigmoid-upscaling", OPT_BOOL(sigmoid_upscaling)},
+        {"sigmoid-center", OPT_FLOAT(sigmoid_center), M_RANGE(0.0, 1.0)},
+        {"sigmoid-slope", OPT_FLOAT(sigmoid_slope), M_RANGE(1.0, 20.0)},
+        {"fbo-format", OPT_STRING(fbo_format)},
+        {"dither-depth", OPT_CHOICE(dither_depth, {"no", -1}, {"auto", 0}),
+            M_RANGE(-1, 16)},
+        {"dither", OPT_CHOICE(dither_algo,
+            {"fruit", DITHER_FRUIT},
+            {"ordered", DITHER_ORDERED},
+            {"error-diffusion", DITHER_ERROR_DIFFUSION},
+            {"no", DITHER_NONE})},
+        {"dither-size-fruit", OPT_INT(dither_size), M_RANGE(2, 8)},
+        {"temporal-dither", OPT_BOOL(temporal_dither)},
+        {"temporal-dither-period", OPT_INT(temporal_dither_period),
+            M_RANGE(1, 128)},
+        {"error-diffusion",
+            OPT_STRING_VALIDATE(error_diffusion, validate_error_diffusion_opt)},
+        {"background", OPT_CHOICE(background,
+            {"none", BACKGROUND_NONE},
+            {"color", BACKGROUND_COLOR},
+            {"tiles", BACKGROUND_TILES})},
+        {"opengl-rectangle-textures", OPT_BOOL(use_rectangle)},
+        {"background-color", OPT_COLOR(background_color)},
+        {"interpolation", OPT_BOOL(interpolation)},
+        {"interpolation-threshold", OPT_FLOAT(interpolation_threshold)},
+        {"blend-subtitles", OPT_CHOICE(blend_subs,
+            {"no", BLEND_SUBS_NO},
+            {"yes", BLEND_SUBS_YES},
+            {"video", BLEND_SUBS_VIDEO})},
+        {"glsl-shaders", OPT_PATHLIST(user_shaders), .flags = M_OPT_FILE},
+        {"glsl-shader", OPT_CLI_ALIAS("glsl-shaders-append")},
+        {"glsl-shader-opts", OPT_KEYVALUELIST(user_shader_opts)},
+        {"deband", OPT_BOOL(deband)},
+        {"deband", OPT_SUBSTRUCT(deband_opts, deband_conf)},
+        {"sharpen", OPT_FLOAT(unsharp)},
+        {"gpu-tex-pad-x", OPT_INT(tex_pad_x), M_RANGE(0, 4096)},
+        {"gpu-tex-pad-y", OPT_INT(tex_pad_y), M_RANGE(0, 4096)},
+        {"", OPT_SUBSTRUCT(icc_opts, mp_icc_conf)},
+        {"gpu-shader-cache", OPT_BOOL(shader_cache)},
+        {"gpu-shader-cache-dir", OPT_STRING(shader_cache_dir), .flags = M_OPT_FILE},
+        {"gpu-hwdec-interop",
+            OPT_STRING_VALIDATE(hwdec_interop, ra_hwdec_validate_opt)},
+        {"gamut-warning", OPT_REMOVED("Replaced by --gamut-mapping-mode=warn")},
+        {"gamut-clipping", OPT_REMOVED("Replaced by --gamut-mapping-mode=desaturate")},
+        {"tone-mapping-desaturate", OPT_REMOVED("Replaced by --tone-mapping-mode")},
+        {"tone-mapping-desaturate-exponent", OPT_REMOVED("Replaced by --tone-mapping-mode")},
+        {"tone-mapping-crosstalk", OPT_REMOVED("Hard-coded as 0.04")},
+        {"tone-mapping-mode", OPT_REMOVED("no replacement")},
         {0}
     },
     .size = sizeof(struct gl_video_opts),
     .defaults = &gl_video_opts_def,
+    .change_flags = UPDATE_VIDEO,
 };
 
 static void uninit_rendering(struct gl_video *p);
@@ -488,7 +506,9 @@ static struct bstr load_cached_file(struct gl_video *p, const char *path)
             return p->files[n].body;
     }
     // not found -> load it
-    struct bstr s = stream_read_file(path, p, p->global, 1000000000); // 1GB
+    char *fname = mp_get_user_path(NULL, p->global, path);
+    struct bstr s = stream_read_file(fname, p, p->global, 1000000000); // 1GB
+    talloc_free(fname);
     if (s.len) {
         struct cached_file new = {
             .path = talloc_strdup(p, path),
@@ -555,6 +575,7 @@ static void uninit_rendering(struct gl_video *p)
         ra_tex_free(p->ra, &p->merge_tex[n]);
         ra_tex_free(p->ra, &p->scale_tex[n]);
         ra_tex_free(p->ra, &p->integer_tex[n]);
+        ra_tex_free(p->ra, &p->chroma_tex[n]);
     }
 
     ra_tex_free(p->ra, &p->indirect_tex);
@@ -582,15 +603,6 @@ bool gl_video_gamma_auto_enabled(struct gl_video *p)
     return p->opts.gamma_auto;
 }
 
-struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p)
-{
-    return (struct mp_colorspace) {
-        .primaries = p->opts.target_prim,
-        .gamma = p->opts.target_trc,
-        .sig_peak = p->opts.target_peak / MP_REF_WHITE,
-    };
-}
-
 // Warning: profile.start must point to a ta allocation, and the function
 //          takes over ownership.
 void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data)
@@ -604,8 +616,8 @@ bool gl_video_icc_auto_enabled(struct gl_video *p)
     return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false;
 }
 
-static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
-                               enum mp_csp_trc trc)
+static bool gl_video_get_lut3d(struct gl_video *p, enum pl_color_primaries prim,
+                               enum pl_color_transfer trc)
 {
     if (!p->use_lut_3d)
         return false;
@@ -653,6 +665,11 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
 
     talloc_free(lut3d);
 
+    if (!p->lut_3d_texture) {
+        p->use_lut_3d = false;
+        return false;
+    }
+
     return true;
 }
 
@@ -743,16 +760,16 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg,
 
     struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}};
 
-    if (p->image_params.chroma_location != MP_CHROMA_CENTER) {
-        int cx, cy;
-        mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy);
+    if (p->image_params.chroma_location != PL_CHROMA_CENTER) {
+        float cx, cy;
+        pl_chroma_location_offset(p->image_params.chroma_location, &cx, &cy);
         // By default texture coordinates are such that chroma is centered with
         // any chroma subsampling. If a specific direction is given, make it
         // so that the luma and chroma sample line up exactly.
         // For 4:4:4, setting chroma location should have no effect at all.
         // luma sample size (in chroma coord. space)
-        chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
-        chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+        chroma.t[0] = ls_w < 1 ? ls_w * -cx : 0;
+        chroma.t[1] = ls_h < 1 ? ls_h * -cy : 0;
     }
 
     memset(img, 0, 4 * sizeof(img[0]));
@@ -768,9 +785,9 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg,
                 ctype = PLANE_NONE;
             } else if (c == 4) {
                 ctype = PLANE_ALPHA;
-            } else if (p->image_params.color.space == MP_CSP_RGB) {
+            } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_RGB) {
                 ctype = PLANE_RGB;
-            } else if (p->image_params.color.space == MP_CSP_XYZ) {
+            } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_XYZ) {
                 ctype = PLANE_XYZ;
             } else {
                 ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA;
@@ -782,9 +799,11 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg,
 
         int msb_valid_bits =
             p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0);
-        int csp = type == PLANE_ALPHA ? MP_CSP_RGB : p->image_params.color.space;
+        int csp = type == PLANE_ALPHA ? PL_COLOR_SYSTEM_RGB : p->image_params.repr.sys;
         float tex_mul =
             1.0 / mp_get_csp_mul(csp, msb_valid_bits, p->ra_format.component_bits);
+        if (p->ra_format.component_type == RA_CTYPE_FLOAT)
+            tex_mul = 1.0;
 
         img[n] = (struct image){
             .type = type,
@@ -854,14 +873,7 @@ static void init_video(struct gl_video *p)
 {
     p->use_integer_conversion = false;
 
-    struct ra_hwdec *hwdec = NULL;
-    for (int n = 0; n < p->num_hwdecs; n++) {
-        if (ra_hwdec_test_format(p->hwdecs[n], p->image_params.imgfmt)) {
-            hwdec = p->hwdecs[n];
-            break;
-        }
-    }
-
+    struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, p->image_params.imgfmt);
     if (hwdec) {
         if (hwdec->driver->overlay_frame) {
             MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed "
@@ -939,9 +951,6 @@ static void init_video(struct gl_video *p)
                        params.w, params.h);
 
             plane->tex = ra_tex_create(p->ra, &params);
-            if (!plane->tex)
-                abort(); // shit happens
-
             p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT;
         }
     }
@@ -1045,13 +1054,13 @@ static void uninit_video(struct gl_video *p)
     ra_hwdec_mapper_free(&p->hwdec_mapper);
 }
 
-static void pass_record(struct gl_video *p, struct mp_pass_perf perf)
+static void pass_record(struct gl_video *p, const struct mp_pass_perf *perf)
 {
     if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX)
         return;
 
     struct pass_info *pass = &p->pass[p->pass_idx];
-    pass->perf = perf;
+    pass->perf = *perf;
 
     if (pass->desc.len == 0)
         bstr_xappend(p, &pass->desc, bstr0("(unknown)"));
@@ -1178,16 +1187,8 @@ static void dispatch_compute(struct gl_video *p, int w, int h,
         if (!s->tex)
             continue;
 
-        // We need to rescale the coordinates to the true texture size
-        char *tex_scale = mp_tprintf(32, "tex_scale%d", n);
-        gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){
-                (float)s->w / s->tex->params.w,
-                (float)s->h / s->tex->params.h,
-        });
-
-        PRELUDE("#define texmap%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n);
-        PRELUDE("#define texmap%d(id) (texture_rot%d * texmap%d_raw(id) + "
-               "pixel_size%d * texture_off%d)\n", n, n, n, n, n);
+        PRELUDE("#define texmap%d(id) (texture_rot%d * outcoord(id) + "
+               "pixel_size%d * texture_off%d)\n", n, n, n, n);
         PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n);
     }
 
@@ -1199,12 +1200,13 @@ static void dispatch_compute(struct gl_video *p, int w, int h,
     if (!(p->ra->caps & RA_CAP_NUM_GROUPS))
         PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y);
 
-    pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1));
+    struct mp_pass_perf perf = gl_sc_dispatch_compute(p->sc, num_x, num_y, 1);
+    pass_record(p, &perf);
     cleanup_binds(p);
 }
 
 static struct mp_pass_perf render_pass_quad(struct gl_video *p,
-                                            struct ra_fbo fbo, bool discard,
+                                            const struct ra_fbo *fbo, bool discard,
                                             const struct mp_rect *dst)
 {
     // The first element is reserved for `vec2 position`
@@ -1262,15 +1264,16 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p,
             &p->tmp_vertex[num_vertex_attribs * 1],
             vertex_stride);
 
-    return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs,
+    return gl_sc_dispatch_draw(p->sc, fbo->tex, discard, p->vao, num_vertex_attribs,
                                vertex_stride, p->tmp_vertex, num_vertices);
 }
 
-static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo,
+static void finish_pass_fbo(struct gl_video *p, const struct ra_fbo *fbo,
                             bool discard, const struct mp_rect *dst)
 {
     pass_prepare_src_tex(p);
-    pass_record(p, render_pass_quad(p, fbo, discard, dst));
+    struct mp_pass_perf perf = render_pass_quad(p, fbo, discard, dst);
+    pass_record(p, &perf);
     debug_check_gl(p, "after rendering");
     cleanup_binds(p);
 }
@@ -1290,8 +1293,11 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
 
     // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
     // over fragment shaders wherever possible.
-    if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
+    if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE) &&
+        (*dst_tex)->params.storage_dst)
+    {
         pass_is_compute(p, 16, 16, true);
+    }
 
     if (p->pass_compute.active) {
         gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
@@ -1304,7 +1310,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
         debug_check_gl(p, "after dispatching compute shader");
     } else {
         struct ra_fbo fbo = { .tex = *dst_tex, };
-        finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h});
+        finish_pass_fbo(p, &fbo, true, &(struct mp_rect){0, 0, w, h});
     }
 }
 
@@ -1317,18 +1323,20 @@ static const char *get_tex_swizzle(struct image *img)
 
 // Copy a texture to the vec4 color, while increasing offset. Also applies
 // the texture multiplier to the sampled color
-static void copy_image(struct gl_video *p, int *offset, struct image img)
+static void copy_image(struct gl_video *p, unsigned int *offset, struct image img)
 {
-    int count = img.components;
-    assert(*offset + count <= 4);
-    assert(img.padding + count <= 4);
-
-    int id = pass_bind(p, img);
+    const unsigned int count = img.components;
     char src[5] = {0};
     char dst[5] = {0};
+
+    assert(*offset + count < sizeof(dst));
+    assert(img.padding + count < sizeof(src));
+
+    int id = pass_bind(p, img);
+
     const char *tex_fmt = get_tex_swizzle(&img);
     const char *dst_fmt = "rgba";
-    for (int i = 0; i < count; i++) {
+    for (unsigned int i = 0; i < count; i++) {
         src[i] = tex_fmt[img.padding + i];
         dst[i] = dst_fmt[*offset + i];
     }
@@ -1387,6 +1395,11 @@ static void hook_prelude(struct gl_video *p, const char *name, int id,
     GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n",
            name, name, name, crap);
 
+    if (p->ra->caps & RA_CAP_GATHER) {
+        GLSLHF("#define %s_gather(pos, c) (%s_mul * vec4("
+               "textureGather(%s_raw, pos, c)))\n", name, name, name);
+    }
+
     // Since the extra matrix multiplication impacts performance,
     // skip it unless the texture was actually rotated
     if (gl_transform_eq(img.transform, identity_trans)) {
@@ -1680,6 +1693,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
                           double scale_factor,
                           int sizes[])
 {
+    assert(conf);
     if (scaler_conf_eq(scaler->conf, *conf) &&
         scaler->scale_factor == scale_factor &&
         scaler->initialized)
@@ -1687,9 +1701,29 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
 
     uninit_scaler(p, scaler);
 
+    if (scaler->index == SCALER_DSCALE && (!conf->kernel.name ||
+        !conf->kernel.name[0]))
+    {
+        conf = &p->opts.scaler[SCALER_SCALE];
+    }
+
+    if (scaler->index == SCALER_CSCALE && (!conf->kernel.name ||
+        !conf->kernel.name[0]))
+    {
+        conf = &p->opts.scaler[SCALER_SCALE];
+    }
+
+    struct filter_kernel bare_window;
     const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name);
     const struct filter_window *t_window = mp_find_filter_window(conf->window.name);
     bool is_tscale = scaler->index == SCALER_TSCALE;
+    if (!t_kernel) {
+        const struct filter_window *window = mp_find_filter_window(conf->kernel.name);
+        if (window) {
+            bare_window = (struct filter_kernel) { .f = *window };
+            t_kernel = &bare_window;
+        }
+    }
 
     scaler->conf = *conf;
     scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale);
@@ -1731,8 +1765,6 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
         scaler->kernel->f.radius = conf->radius;
 
     scaler->kernel->clamp = conf->clamp;
-    scaler->kernel->value_cutoff = conf->cutoff;
-
     scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
 
     int size = scaler->kernel->size;
@@ -1744,17 +1776,16 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
     int stride = width * num_components;
     assert(size <= stride);
 
-    scaler->lut_size = 1 << p->opts.scaler_lut_size;
-
-    float *weights = talloc_array(NULL, float, scaler->lut_size * stride);
-    mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights);
+    static const int lut_size = 256;
+    float *weights = talloc_array(NULL, float, lut_size * stride);
+    mp_compute_lut(scaler->kernel, lut_size, stride, weights);
 
     bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D);
 
     struct ra_tex_params lut_params = {
         .dimensions = use_1d ? 1 : 2,
-        .w = use_1d ? scaler->lut_size : width,
-        .h = use_1d ? 1 : scaler->lut_size,
+        .w = use_1d ? lut_size : width,
+        .h = use_1d ? 1 : lut_size,
         .d = 1,
         .format = fmt,
         .render_src = true,
@@ -1884,8 +1915,7 @@ static void pass_sample(struct gl_video *p, struct image img,
     } else if (scaler->kernel) {
         pass_sample_separated(p, img, scaler, w, h);
     } else {
-        // Should never happen
-        abort();
+        MP_ASSERT_UNREACHABLE(); // should never happen
     }
 
     // Apply any required multipliers. Separated scaling already does this in
@@ -1916,7 +1946,7 @@ static void deband_hook(struct gl_video *p, struct image img,
 {
     pass_describe(p, "debanding (%s)", plane_names[img.type]);
     pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg,
-                       p->image_params.color.gamma);
+                       p->image_params.color.transfer);
 }
 
 static void unsharp_hook(struct gl_video *p, struct image img,
@@ -2007,25 +2037,23 @@ static void user_hook(struct gl_video *p, struct image img,
     gl_transform_trans(shader->offset, trans);
 }
 
-static bool add_user_hook(void *priv, struct gl_user_shader_hook hook)
+static bool add_user_hook(void *priv, const struct gl_user_shader_hook *hook)
 {
     struct gl_video *p = priv;
-    struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy);
-    *copy = hook;
-
+    struct gl_user_shader_hook *copy = talloc_dup(p, (struct gl_user_shader_hook *)hook);
     struct tex_hook texhook = {
-        .save_tex = bstrdup0(copy, hook.save_tex),
-        .components = hook.components,
-        .align_offset = hook.align_offset,
+        .save_tex = bstrdup0(copy, copy->save_tex),
+        .components = copy->components,
+        .align_offset = copy->align_offset,
         .hook = user_hook,
         .cond = user_hook_cond,
         .priv = copy,
     };
 
     for (int h = 0; h < SHADER_MAX_HOOKS; h++)
-        texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]);
+        texhook.hook_tex[h] = bstrdup0(copy, copy->hook_tex[h]);
     for (int h = 0; h < SHADER_MAX_BINDS; h++)
-        texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]);
+        texhook.bind_tex[h] = bstrdup0(copy, copy->bind_tex[h]);
 
     MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook);
     return true;
@@ -2174,6 +2202,23 @@ static void pass_read_video(struct gl_video *p)
         }
     }
 
+    // If chroma textures are in a subsampled semi-planar format and rotated,
+    // introduce an explicit conversion pass to avoid breaking chroma scalers.
+    for (int n = 0; n < 4; n++) {
+        if (img[n].tex && img[n].type == PLANE_CHROMA &&
+            img[n].tex->params.format->num_components == 2 &&
+            p->image_params.rotate % 180 == 90 &&
+            p->ra_format.chroma_w != 1)
+        {
+            GLSLF("// chroma fix for rotated plane %d\n", n);
+            copy_image(p, &(int){0}, img[n]);
+            pass_describe(p, "chroma fix for rotated plane");
+            finish_pass_tex(p, &p->chroma_tex[n], img[n].w, img[n].h);
+            img[n] = image_wrap(p->chroma_tex[n], img[n].type,
+                                img[n].components);
+        }
+    }
+
     // At this point all planes are finalized but they may not be at the
     // required size yet. Furthermore, they may have texture offsets that
     // require realignment.
@@ -2249,6 +2294,13 @@ static void pass_read_video(struct gl_video *p)
             continue;
 
         const struct scaler_config *conf = &p->opts.scaler[scaler_id];
+
+        if (scaler_id == SCALER_CSCALE && (!conf->kernel.name ||
+            !conf->kernel.name[0]))
+        {
+            conf = &p->opts.scaler[SCALER_SCALE];
+        }
+
         struct scaler *scaler = &p->scaler[scaler_id];
 
         // bilinear scaling is a free no-op thanks to GPU sampling
@@ -2288,6 +2340,7 @@ static void pass_convert_yuv(struct gl_video *p)
 
     struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS;
     cparams.gray = p->is_gray;
+    cparams.is_float = p->ra_format.component_type == RA_CTYPE_FLOAT;
     mp_csp_set_image_params(&cparams, &p->image_params);
     mp_csp_equalizer_state_get(p->video_eq, &cparams);
     p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma);
@@ -2298,22 +2351,29 @@ static void pass_convert_yuv(struct gl_video *p)
         GLSLF("color = color.%s;\n", p->color_swizzle);
 
     // Pre-colormatrix input gamma correction
-    if (cparams.color.space == MP_CSP_XYZ)
-        GLSL(color.rgb = pow(color.rgb, vec3(2.6));) // linear light
+    if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ)
+        pass_linearize(p->sc, p->image_params.color.transfer);
 
     // We always explicitly normalize the range in pass_read_video
     cparams.input_bits = cparams.texture_bits = 0;
 
     // Conversion to RGB. For RGB itself, this still applies e.g. brightness
     // and contrast controls, or expansion of e.g. LSB-packed 10 bit data.
-    struct mp_cmat m = {{{0}}};
+    struct pl_transform3x3 m = {0};
     mp_get_csp_matrix(&cparams, &m);
-    gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]);
+    gl_sc_uniform_mat3(sc, "colormatrix", true, &m.mat.m[0][0]);
     gl_sc_uniform_vec3(sc, "colormatrix_c", m.c);
 
     GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;)
 
-    if (p->image_params.color.space == MP_CSP_BT_2020_C) {
+    if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ) {
+        pass_delinearize(p->sc, p->image_params.color.transfer);
+        // mp_get_csp_matrix implicitly converts XYZ to DCI-P3
+        p->image_params.repr.sys = PL_COLOR_SYSTEM_RGB;
+        p->image_params.color.primaries = PL_COLOR_PRIM_DCI_P3;
+    }
+
+    if (p->image_params.repr.sys == PL_COLOR_SYSTEM_BT_2020_C) {
         // Conversion for C'rcY'cC'bc via the BT.2020 CL system:
         // C'bc = (B'-Y'c) / 1.9404  | C'bc <= 0
         //      = (B'-Y'c) / 1.5816  | C'bc >  0
@@ -2324,34 +2384,39 @@ static void pass_convert_yuv(struct gl_video *p)
         // as per the BT.2020 specification, table 4. This is a non-linear
         // transformation because (constant) luminance receives non-equal
         // contributions from the three different channels.
-        GLSLF("// constant luminance conversion\n");
-        GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936),
-                                       vec2(1.9404, 1.7184),
-                                       lessThanEqual(color.br, vec2(0)))
-                        + color.gg;)
+        GLSLF("// constant luminance conversion \n"
+              "color.br = color.br * mix(vec2(1.5816, 0.9936),              \n"
+              "                         vec2(1.9404, 1.7184),               \n"
+              "                         %s(lessThanEqual(color.br, vec2(0))))\n"
+              "          + color.gg;                                        \n",
+              gl_sc_bvec(p->sc, 2));
         // Expand channels to camera-linear light. This shader currently just
         // assumes everything uses the BT.