1 files changed, 34 insertions, 17 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 46d9026742..13e5b06918 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -373,8 +373,9 @@ const struct m_sub_options gl_video_conf = {
         SCALER_OPTS("tscale", SCALER_TSCALE),
         OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10),
         OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0),
-        OPT_FLAG("linear-scaling", linear_scaling, 0),
         OPT_FLAG("correct-downscaling", correct_downscaling, 0),
+        OPT_FLAG("linear-downscaling", linear_downscaling, 0),
+        OPT_FLAG("linear-upscaling", linear_upscaling, 0),
         OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0),
         OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0),
         OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0),
@@ -423,6 +424,8 @@ const struct m_sub_options gl_video_conf = {
         OPT_REPLACED("opengl-fbo-format", "fbo-format"),
         OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"),
         OPT_REPLACED("opengl-gamma", "gamma-factor"),
+        OPT_REMOVED("linear-scaling", "Split into --linear-upscaling and "
+                    "--linear-downscaling"),
         {0}
     },
     .size = sizeof(struct gl_video_opts),
@@ -1103,8 +1106,14 @@ static void cleanup_binds(struct gl_video *p)
 
 // Sets the appropriate compute shader metadata for an implicit compute pass
 // bw/bh: block size
-static void pass_is_compute(struct gl_video *p, int bw, int bh)
+static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible)
 {
+    if (p->pass_compute.active && flexible) {
+        // Avoid overwriting existing block sizes when using a flexible pass
+        bw = p->pass_compute.block_w;
+        bh = p->pass_compute.block_h;
+    }
+
     p->pass_compute = (struct compute_info){
         .active = true,
         .block_w = bw,
@@ -1248,7 +1257,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
     // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
     // over fragment shaders wherever possible.
     if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
-        pass_is_compute(p, 16, 16);
+        pass_is_compute(p, 16, 16, true);
 
     if (p->pass_compute.active) {
         gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
@@ -1744,7 +1753,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler
     if (shmem_req > p->ra->max_shmem)
         goto fallback;
 
-    pass_is_compute(p, bw, bh);
+    pass_is_compute(p, bw, bh, false);
     pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih);
     return;
 
@@ -2326,13 +2335,18 @@ static void pass_scale_main(struct gl_video *p)
 
     // Pre-conversion, like linear light/sigmoidization
     GLSLF("// scaler pre-conversion\n");
-    bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
+    bool use_linear = false;
+    if (downscaling) {
+        use_linear = p->opts.linear_downscaling;
 
-    // Linear light downscaling results in nasty artifacts for HDR curves due
-    // to the potentially extreme brightness differences severely compounding
-    // any ringing. So just scale in gamma light instead.
-    if (mp_trc_is_hdr(p->image_params.color.gamma) && downscaling)
-        use_linear = false;
+        // Linear light downscaling results in nasty artifacts for HDR curves
+        // due to the potentially extreme brightness differences severely
+        // compounding any ringing. So just scale in gamma light instead.
+        if (mp_trc_is_hdr(p->image_params.color.gamma))
+            use_linear = false;
+    } else if (upscaling) {
+        use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling;
+    }
 
     if (use_linear) {
         p->use_linear = true;
@@ -2485,7 +2499,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
 
     if (detect_peak) {
         pass_describe(p, "detect HDR peak");
-        pass_is_compute(p, 8, 8); // 8x8 is good for performance
+        pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
         gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
             "uint counter;"
             "uint frame_idx;"
@@ -3488,9 +3502,9 @@ static bool check_dumb_mode(struct gl_video *p)
         return false;
 
     // otherwise, use auto-detection
-    if (o->target_prim || o->target_trc || o->linear_scaling ||
-        o->correct_downscaling || o->sigmoid_upscaling || o->interpolation ||
-        o->blend_subs || o->deband || o->unsharp)
+    if (o->target_prim || o->target_trc || o->correct_downscaling ||
+        o->linear_downscaling || o->linear_upscaling || o->sigmoid_upscaling ||
+        o->interpolation || o->blend_subs || o->deband || o->unsharp)
         return false;
     // check remaining scalers (tscale is already implicitly excluded above)
     for (int i = 0; i < SCALER_COUNT; i++) {
@@ -3519,7 +3533,7 @@ static void check_gl_features(struct gl_video *p)
     bool have_ssbo = ra->caps & RA_CAP_BUF_RW;
     bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD;
 
-    const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgba16hf",
+    const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16",
                                    "rgb10_a2", "rgba8", 0};
     const char *user_fbo_fmts[] = {p->opts.fbo_format, 0};
     const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto")
@@ -3646,8 +3660,11 @@ static void check_gl_features(struct gl_video *p)
                   p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d;
 
     // mix() is needed for some gamma functions
-    if (!have_mglsl && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) {
-        p->opts.linear_scaling = false;
+    if (!have_mglsl && (p->opts.linear_downscaling ||
+                        p->opts.linear_upscaling || p->opts.sigmoid_upscaling))
+    {
+        p->opts.linear_downscaling = false;
+        p->opts.linear_upscaling = false;
         p->opts.sigmoid_upscaling = false;
         MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n");
     }