1 files changed, 1294 insertions, 819 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 8807b65005..f46fdc1c9f 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -17,6 +17,7 @@
 
 #include <assert.h>
 #include <math.h>
+#include <stdarg.h>
 #include <stdbool.h>
 #include <string.h>
 #include <assert.h>
@@ -31,24 +32,22 @@
 #include "common/global.h"
 #include "options/options.h"
 #include "common.h"
+#include "formats.h"
 #include "utils.h"
 #include "hwdec.h"
 #include "osd.h"
 #include "stream/stream.h"
-#include "superxbr.h"
-#include "nnedi3.h"
 #include "video_shaders.h"
+#include "user_shaders.h"
 #include "video/out/filter_kernels.h"
 #include "video/out/aspect.h"
 #include "video/out/bitmap_packer.h"
 #include "video/out/dither.h"
 #include "video/out/vo.h"
 
-// Maximal number of passes that prescaler can be applied.
-#define MAX_PRESCALE_PASSES 5
-
-// Maximal number of steps each pass of prescaling contains
-#define MAX_PRESCALE_STEPS 2
+// Maximal number of saved textures (for user script purposes)
+#define MAX_TEXTURE_HOOKS 16
+#define MAX_SAVED_TEXTURES 32
 
 // scale/cscale arguments that map directly to shader filter routines.
 // Note that the convolution filters are not included in this list.
@@ -91,6 +90,7 @@ static const struct gl_vao_entry vertex_vao[] = {
 
 struct texplane {
     int w, h;
+    int tex_w, tex_h;
     GLint gl_internal_format;
     GLenum gl_target;
     bool use_integer;
@@ -98,12 +98,14 @@ struct texplane {
     GLenum gl_type;
     GLuint gl_texture;
     int gl_buffer;
+    char swizzle[5];
 };
 
 struct video_image {
     struct texplane planes[4];
     bool image_flipped;
     struct mp_image *mpi;       // original input image
+    bool hwdec_mapped;
 };
 
 enum plane_type {
@@ -125,10 +127,29 @@ struct img_tex {
     GLenum gl_target;
     bool use_integer;
     int tex_w, tex_h; // source texture size
-    int w, h; // logical size (with pre_transform applied)
-    struct gl_transform pre_transform; // source texture space
+    int w, h; // logical size (after transformation)
     struct gl_transform transform; // rendering transformation
-    bool texture_la; // it's a GL_LUMINANCE_ALPHA texture (access with .ra not .rg)
+    char swizzle[5];
+};
+
+// A named img_tex, for user scripting purposes
+struct saved_tex {
+    const char *name;
+    struct img_tex tex;
+};
+
+// A texture hook. This is some operation that transforms a named texture as
+// soon as it's generated
+struct tex_hook {
+    char *hook_tex;
+    char *save_tex;
+    char *bind_tex[TEXUNIT_VIDEO_NUM];
+    int components; // how many components are relevant (0 = same as input)
+    void *priv; // this can be set to whatever the hook wants
+    void (*hook)(struct gl_video *p, struct img_tex tex, // generates GLSL
+                 struct gl_transform *trans, void *priv);
+    void (*free)(struct tex_hook *hook);
+    bool (*cond)(struct gl_video *p, struct img_tex tex, void *priv);
 };
 
 struct fbosurface {
@@ -140,7 +161,7 @@ struct fbosurface {
 
 struct cached_file {
     char *path;
-    char *body;
+    struct bstr body;
 };
 
 struct gl_video {
@@ -149,15 +170,15 @@ struct gl_video {
     struct mpv_global *global;
     struct mp_log *log;
     struct gl_video_opts opts;
+    struct gl_video_opts *opts_alloc;
     struct gl_lcms *cms;
     bool gl_debug;
 
     int texture_16bit_depth;    // actual bits available in 16 bit textures
+    int fb_depth;               // actual bits available in GL main framebuffer
 
     struct gl_shader_cache *sc;
 
-    GLenum gl_target; // texture target (GL_TEXTURE_2D, ...) for video and FBOs
-
     struct gl_vao vao;
 
     struct osd_state *osd_state;
@@ -170,7 +191,9 @@ struct gl_video {
     GLuint dither_texture;
     int dither_size;
 
-    GLuint nnedi3_weights_buffer;
+    struct gl_timer *upload_timer;
+    struct gl_timer *render_timer;
+    struct gl_timer *present_timer;
 
     struct mp_image_params real_image_params;   // configured format
     struct mp_image_params image_params;        // texture format (mind hwdec case)
@@ -188,21 +211,13 @@ struct gl_video {
     bool forced_dumb_mode;
 
     struct fbotex merge_fbo[4];
-    struct fbotex deband_fbo[4];
     struct fbotex scale_fbo[4];
     struct fbotex integer_fbo[4];
     struct fbotex indirect_fbo;
     struct fbotex blend_subs_fbo;
-    struct fbotex unsharp_fbo;
     struct fbotex output_fbo;
     struct fbosurface surfaces[FBOSURFACES_MAX];
-
-    // these are duplicated so we can keep rendering back and forth between
-    // them to support an unlimited number of shader passes per step
-    struct fbotex pre_fbo[2];
-    struct fbotex post_fbo[2];
-
-    struct fbotex prescale_fbo[MAX_PRESCALE_PASSES][MAX_PRESCALE_STEPS];
+    struct fbotex vdpau_deinterleave_fbo[2];
 
     int surface_idx;
     int surface_now;
@@ -229,6 +244,14 @@ struct gl_video {
     bool use_linear;
     float user_gamma;
 
+    // hooks and saved textures
+    struct saved_tex saved_tex[MAX_SAVED_TEXTURES];
+    int saved_tex_num;
+    struct tex_hook tex_hooks[MAX_TEXTURE_HOOKS];
+    int tex_hook_num;
+    struct fbotex hook_fbos[MAX_SAVED_TEXTURES];
+    int hook_fbo_num;
+
     int frames_uploaded;
     int frames_rendered;
     AVLFG lfg;
@@ -237,7 +260,7 @@ struct gl_video {
     int last_dither_matrix_size;
     float *last_dither_matrix;
 
-    struct cached_file files[10];
+    struct cached_file *files;
     int num_files;
 
     struct gl_hwdec *hwdec;
@@ -245,89 +268,7 @@ struct gl_video {
 
     bool dsi_warned;
     bool custom_shader_fn_warned;
-};
-
-struct fmt_entry {
-    int mp_format;
-    GLint internal_format;
-    GLenum format;
-    GLenum type;
-};
-
-// Very special formats, for which OpenGL happens to have direct support
-static const struct fmt_entry mp_to_gl_formats[] = {
-    {IMGFMT_RGB565,  GL_RGB,   GL_RGB,  GL_UNSIGNED_SHORT_5_6_5},
-    {0},
-};
-
-static const struct fmt_entry gl_byte_formats[] = {
-    {0, GL_RED,     GL_RED,     GL_UNSIGNED_BYTE},      // 1 x 8
-    {0, GL_RG,      GL_RG,      GL_UNSIGNED_BYTE},      // 2 x 8
-    {0, GL_RGB,     GL_RGB,     GL_UNSIGNED_BYTE},      // 3 x 8
-    {0, GL_RGBA,    GL_RGBA,    GL_UNSIGNED_BYTE},      // 4 x 8
-    {0, GL_R16,     GL_RED,     GL_UNSIGNED_SHORT},     // 1 x 16
-    {0, GL_RG16,    GL_RG,      GL_UNSIGNED_SHORT},     // 2 x 16
-    {0, GL_RGB16,   GL_RGB,     GL_UNSIGNED_SHORT},     // 3 x 16
-    {0, GL_RGBA16,  GL_RGBA,    GL_UNSIGNED_SHORT},     // 4 x 16
-};
-
-static const struct fmt_entry gl_byte_formats_gles3[] = {
-    {0, GL_R8,       GL_RED,    GL_UNSIGNED_BYTE},      // 1 x 8
-    {0, GL_RG8,      GL_RG,     GL_UNSIGNED_BYTE},      // 2 x 8
-    {0, GL_RGB8,     GL_RGB,    GL_UNSIGNED_BYTE},      // 3 x 8
-    {0, GL_RGBA8,    GL_RGBA,   GL_UNSIGNED_BYTE},      // 4 x 8
-    // There are no filterable texture formats that can be uploaded as
-    // GL_UNSIGNED_SHORT, so apparently we're out of luck.
-    {0, 0,           0,         0},                     // 1 x 16
-    {0, 0,           0,         0},                     // 2 x 16
-    {0, 0,           0,         0},                     // 3 x 16
-    {0, 0,           0,         0},                     // 4 x 16
-};
-
-static const struct fmt_entry gl_ui_byte_formats_gles3[] = {
-    {0, GL_R8UI,      GL_RED_INTEGER,   GL_UNSIGNED_BYTE},  // 1 x 8
-    {0, GL_RG8UI,     GL_RG_INTEGER,    GL_UNSIGNED_BYTE},  // 2 x 8
-    {0, GL_RGB8UI,    GL_RGB_INTEGER,   GL_UNSIGNED_BYTE},  // 3 x 8
-    {0, GL_RGBA8UI,   GL_RGBA_INTEGER,  GL_UNSIGNED_BYTE},  // 4 x 8
-    {0, GL_R16UI,     GL_RED_INTEGER,   GL_UNSIGNED_SHORT}, // 1 x 16
-    {0, GL_RG16UI,    GL_RG_INTEGER,    GL_UNSIGNED_SHORT}, // 2 x 16
-    {0, GL_RGB16UI,   GL_RGB_INTEGER,   GL_UNSIGNED_SHORT}, // 3 x 16
-    {0, GL_RGBA16UI,  GL_RGBA_INTEGER,  GL_UNSIGNED_SHORT}, // 4 x 16
-};
-
-static const struct fmt_entry gl_byte_formats_gles2[] = {
-    {0, GL_LUMINANCE,           GL_LUMINANCE,       GL_UNSIGNED_BYTE}, // 1 x 8
-    {0, GL_LUMINANCE_ALPHA,     GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8
-    {0, GL_RGB,                 GL_RGB,             GL_UNSIGNED_BYTE}, // 3 x 8
-    {0, GL_RGBA,                GL_RGBA,            GL_UNSIGNED_BYTE}, // 4 x 8
-    {0, 0,                      0,                  0},                // 1 x 16
-    {0, 0,                      0,                  0},                // 2 x 16
-    {0, 0,                      0,                  0},                // 3 x 16
-    {0, 0,                      0,                  0},                // 4 x 16
-};
-
-static const struct fmt_entry gl_byte_formats_legacy[] = {
-    {0, GL_LUMINANCE,           GL_LUMINANCE,       GL_UNSIGNED_BYTE}, // 1 x 8
-    {0, GL_LUMINANCE_ALPHA,     GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8
-    {0, GL_RGB,                 GL_RGB,             GL_UNSIGNED_BYTE}, // 3 x 8
-    {0, GL_RGBA,                GL_RGBA,            GL_UNSIGNED_BYTE}, // 4 x 8
-    {0, GL_LUMINANCE16,         GL_LUMINANCE,       GL_UNSIGNED_SHORT},// 1 x 16
-    {0, GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, GL_UNSIGNED_SHORT},// 2 x 16
-    {0, GL_RGB16,               GL_RGB,             GL_UNSIGNED_SHORT},// 3 x 16
-    {0, GL_RGBA16,              GL_RGBA,            GL_UNSIGNED_SHORT},// 4 x 16
-};
-
-static const struct fmt_entry gl_float16_formats[] = {
-    {0, GL_R16F,    GL_RED,     GL_FLOAT},              // 1 x f
-    {0, GL_RG16F,   GL_RG,      GL_FLOAT},              // 2 x f
-    {0, GL_RGB16F,  GL_RGB,     GL_FLOAT},              // 3 x f
-    {0, GL_RGBA16F, GL_RGBA,    GL_FLOAT},              // 4 x f
-};
-
-static const struct fmt_entry gl_apple_formats[] = {
-    {IMGFMT_UYVY, GL_RGB, GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE},
-    {IMGFMT_YUYV, GL_RGB, GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_REV_APPLE},
-    {0}
+    bool broken_frame; // temporary error state
 };
 
 struct packed_fmt_entry {
@@ -359,6 +300,7 @@ static const struct packed_fmt_entry mp_packed_formats[] = {
 };
 
 const struct gl_video_opts gl_video_opts_def = {
+    .dither_algo = DITHER_FRUIT,
     .dither_depth = -1,
     .dither_size = 6,
     .temporal_dither_period = 1,
@@ -375,14 +317,16 @@ const struct gl_video_opts gl_video_opts_def = {
     .scaler_resizes_only = 1,
     .scaler_lut_size = 6,
     .interpolation_threshold = 0.0001,
-    .alpha_mode = 3,
+    .alpha_mode = ALPHA_BLEND_TILES,
     .background = {0, 0, 0, 255},
     .gamma = 1.0f,
-    .prescale_passes = 1,
-    .prescale_downscaling_threshold = 2.0f,
+    .target_brightness = 250,
+    .hdr_tone_mapping = TONE_MAPPING_HABLE,
+    .tone_mapping_param = NAN,
 };
 
 const struct gl_video_opts gl_video_opts_hq_def = {
+    .dither_algo = DITHER_FRUIT,
     .dither_depth = 0,
     .dither_size = 6,
     .temporal_dither_period = 1,
@@ -401,13 +345,13 @@ const struct gl_video_opts gl_video_opts_hq_def = {
     .scaler_resizes_only = 1,
     .scaler_lut_size = 6,
     .interpolation_threshold = 0.0001,
-    .alpha_mode = 3,
+    .alpha_mode = ALPHA_BLEND_TILES,
     .background = {0, 0, 0, 255},
     .gamma = 1.0f,
-    .blend_subs = 0,
     .deband = 1,
-    .prescale_passes = 1,
-    .prescale_downscaling_threshold = 2.0f,
+    .target_brightness = 250,
+    .hdr_tone_mapping = TONE_MAPPING_HABLE,
+    .tone_mapping_param = NAN,
 };
 
 static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
@@ -436,6 +380,14 @@ const struct m_sub_options gl_video_conf = {
         OPT_FLAG("gamma-auto", gamma_auto, 0),
         OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
         OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
+        OPT_INTRANGE("target-brightness", target_brightness, 0, 1, 100000),
+        OPT_CHOICE("hdr-tone-mapping", hdr_tone_mapping, 0,
+                   ({"clip",     TONE_MAPPING_CLIP},
+                    {"reinhard", TONE_MAPPING_REINHARD},
+                    {"hable",    TONE_MAPPING_HABLE},
+                    {"gamma",    TONE_MAPPING_GAMMA},
+                    {"linear",   TONE_MAPPING_LINEAR})),
+        OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
         OPT_FLAG("pbo", pbo, 0),
         SCALER_OPTS("scale",  SCALER_SCALE),
         SCALER_OPTS("dscale", SCALER_DSCALE),
@@ -449,9 +401,7 @@ const struct m_sub_options gl_video_conf = {
         OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0),
         OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0),
         OPT_CHOICE("fbo-format", fbo_format, 0,
-                   ({"rgb",    GL_RGB},
-                    {"rgba",   GL_RGBA},
-                    {"rgb8",   GL_RGB8},
+                   ({"rgb8",   GL_RGB8},
                     {"rgba8",  GL_RGBA8},
                     {"rgb10",  GL_RGB10},
                     {"rgb10_a2", GL_RGB10_A2},
@@ -466,42 +416,33 @@ const struct m_sub_options gl_video_conf = {
         OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
                           ({"no", -1}, {"auto", 0})),
         OPT_CHOICE("dither", dither_algo, 0,
-                   ({"fruit", 0}, {"ordered", 1}, {"no", -1})),
+                   ({"fruit", DITHER_FRUIT},
+                    {"ordered", DITHER_ORDERED},
+                    {"no", DITHER_NONE})),
         OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
         OPT_FLAG("temporal-dither", temporal_dither, 0),
         OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128),
         OPT_CHOICE("alpha", alpha_mode, 0,
-                   ({"no", 0},
-                    {"yes", 1},
-                    {"blend", 2},
-                    {"blend-tiles", 3})),
+                   ({"no", ALPHA_NO},
+                    {"yes", ALPHA_YES},
+                    {"blend", ALPHA_BLEND},
+                    {"blend-tiles", ALPHA_BLEND_TILES})),
         OPT_FLAG("rectangle-textures", use_rectangle, 0),
         OPT_COLOR("background", background, 0),
         OPT_FLAG("interpolation", interpolation, 0),
         OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0),
         OPT_CHOICE("blend-subtitles", blend_subs, 0,
-                   ({"no", 0},
-                    {"yes", 1},
-                    {"video", 2})),
+                   ({"no", BLEND_SUBS_NO},
+                    {"yes", BLEND_SUBS_YES},
+                    {"video", BLEND_SUBS_VIDEO})),
         OPT_STRING("scale-shader", scale_shader, 0),
         OPT_STRINGLIST("pre-shaders", pre_shaders, 0),
         OPT_STRINGLIST("post-shaders", post_shaders, 0),
+        OPT_STRINGLIST("user-shaders", user_shaders, 0),
         OPT_FLAG("deband", deband, 0),
         OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
         OPT_FLOAT("sharpen", unsharp, 0),
-        OPT_CHOICE("prescale-luma", prescale_luma, 0,
-                   ({"none", 0},
-                    {"superxbr", 1}
-#if HAVE_NNEDI
-                    , {"nnedi3", 2}
-#endif
-                    )),
-        OPT_INTRANGE("prescale-passes",
-                     prescale_passes, 0, 1, MAX_PRESCALE_PASSES),
-        OPT_FLOATRANGE("prescale-downscaling-threshold",
-                       prescale_downscaling_threshold, 0, 0.0, 32.0),
-        OPT_SUBSTRUCT("superxbr", superxbr_opts, superxbr_conf, 0),
-        OPT_SUBSTRUCT("nnedi3", nnedi3_opts, nnedi3_conf, 0),
+        OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0),
 
         OPT_REMOVED("approx-gamma", "this is always enabled now"),
         OPT_REMOVED("cscale-down", "chroma is never downscaled"),
@@ -509,6 +450,7 @@ const struct m_sub_options gl_video_conf = {
         OPT_REMOVED("indirect", "this is set automatically whenever sane"),
         OPT_REMOVED("srgb", "use target-prim=bt709:target-trc=srgb instead"),
         OPT_REMOVED("source-shader", "use :deband to enable debanding"),
+        OPT_REMOVED("prescale-luma", "use user shaders for prescaling"),
 
         OPT_REPLACED("lscale", "scale"),
         OPT_REPLACED("lscale-down", "scale-down"),
@@ -524,7 +466,6 @@ const struct m_sub_options gl_video_conf = {
         OPT_REPLACED("smoothmotion-threshold", "tscale-param1"),
         OPT_REPLACED("scale-down", "dscale"),
         OPT_REPLACED("fancy-downscaling", "correct-downscaling"),
-        OPT_REPLACED("prescale", "prescale-luma"),
 
         {0}
     },
@@ -535,78 +476,44 @@ const struct m_sub_options gl_video_conf = {
 static void uninit_rendering(struct gl_video *p);
 static void uninit_scaler(struct gl_video *p, struct scaler *scaler);
 static void check_gl_features(struct gl_video *p);
-static bool init_format(int fmt, struct gl_video *init);
-static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi);
-static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src);
+static bool init_format(struct gl_video *p, int fmt, bool test_only);
+static void init_image_desc(struct gl_video *p, int fmt);
+static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi);
+static void set_options(struct gl_video *p, struct gl_video_opts *src);
+static const char *handle_scaler_opt(const char *name, bool tscale);
+static void reinit_from_options(struct gl_video *p);
 static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]);
+static void gl_video_setup_hooks(struct gl_video *p);
 
 #define GLSL(x) gl_sc_add(p->sc, #x "\n");
 #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
 #define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
 
-// Return a fixed point texture format with given characteristics.
-static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp,
-                                               int n_channels)
-{
-    assert(bytes_per_comp == 1 || bytes_per_comp == 2);
-    assert(n_channels >= 1 && n_channels <= 4);
-    const struct fmt_entry *fmts = gl_byte_formats;
-    if (gl->es >= 300) {
-        fmts = gl_byte_formats_gles3;
-    } else if (gl->es) {
-        fmts = gl_byte_formats_gles2;
-    } else if (!(gl->mpgl_caps & MPGL_CAP_TEX_RG)) {
-        fmts = gl_byte_formats_legacy;
-    }
-    return &fmts[n_channels - 1 + (bytes_per_comp - 1) * 4];
-}
-
-static bool is_integer_format(const struct fmt_entry *fmt)
-{
-    // Tests only the formats which we actually declare somewhere.
-    switch (fmt->format) {
-    case GL_RED_INTEGER:
-    case GL_RG_INTEGER:
-    case GL_RGB_INTEGER:
-    case GL_RGBA_INTEGER:
-        return true;
-    }
-    return false;
-}
-
-static const char *load_cached_file(struct gl_video *p, const char *path)
+static struct bstr load_cached_file(struct gl_video *p, const char *path)
 {
     if (!path || !path[0])
-        return NULL;
+        return (struct bstr){0};
     for (int n = 0; n < p->num_files; n++) {
         if (strcmp(p->files[n].path, path) == 0)
             return p->files[n].body;
     }
     // not found -> load it
-    if (p->num_files == MP_ARRAY_SIZE(p->files)) {
-        // empty cache when it overflows
-        for (int n = 0; n < p->num_files; n++) {
-            talloc_free(p->files[n].path);
-            talloc_free(p->files[n].body);
-        }
-        p->num_files = 0;
-    }
-    struct bstr s = stream_read_file(path, p, p->global, 100000); // 100 kB
+    struct bstr s = stream_read_file(path, p, p->global, 1024000); // 1024 kB
     if (s.len) {
-        struct cached_file *new = &p->files[p->num_files++];
-        *new = (struct cached_file) {
+        struct cached_file new = {
             .path = talloc_strdup(p, path),
-            .body = s.start
+            .body = s,
         };
-        return new->body;
+        MP_TARRAY_APPEND(p, p->files, p->num_files, new);
+        return new.body;
     }
-    return NULL;
+    return (struct bstr){0};
 }
 
 static void debug_check_gl(struct gl_video *p, const char *msg)
 {
     if (p->gl_debug)
-        glCheckError(p->gl, p->log, msg);
+        gl_check_error(p->gl, p->log, msg);
 }
 
 void gl_video_set_debug(struct gl_video *p, bool enable)
@@ -628,13 +535,23 @@ static void gl_video_reset_surfaces(struct gl_video *p)
     p->output_fbo_valid = false;
 }
 
+static void gl_video_reset_hooks(struct gl_video *p)
+{
+    for (int i = 0; i < p->tex_hook_num; i++) {
+        if (p->tex_hooks[i].free)
+            p->tex_hooks[i].free(&p->tex_hooks[i]);
+    }
+
+    p->tex_hook_num = 0;
+}
+
 static inline int fbosurface_wrap(int id)
 {
     id = id % FBOSURFACES_MAX;
     return id < 0 ? id + FBOSURFACES_MAX : id;
 }
 
-static void recreate_osd(struct gl_video *p)
+static void reinit_osd(struct gl_video *p)
 {
     mpgl_osd_destroy(p->osd);
     p->osd = NULL;
@@ -644,17 +561,6 @@ static void recreate_osd(struct gl_video *p)
     }
 }
 
-static void reinit_rendering(struct gl_video *p)
-{
-    MP_VERBOSE(p, "Reinit rendering.\n");
-
-    debug_check_gl(p, "before scaler initialization");
-
-    uninit_rendering(p);
-
-    recreate_osd(p);
-}
-
 static void uninit_rendering(struct gl_video *p)
 {
     GL *gl = p->gl;
@@ -665,45 +571,41 @@ static void uninit_rendering(struct gl_video *p)
     gl->DeleteTextures(1, &p->dither_texture);
     p->dither_texture = 0;
 
-    gl->DeleteBuffers(1, &p->nnedi3_weights_buffer);
-    p->nnedi3_weights_buffer = 0;
-
     for (int n = 0; n < 4; n++) {
         fbotex_uninit(&p->merge_fbo[n]);
-        fbotex_uninit(&p->deband_fbo[n]);
         fbotex_uninit(&p->scale_fbo[n]);
         fbotex_uninit(&p->integer_fbo[n]);
     }
 
     fbotex_uninit(&p->indirect_fbo);
     fbotex_uninit(&p->blend_subs_fbo);
-    fbotex_uninit(&p->unsharp_fbo);
-
-    for (int n = 0; n < 2; n++) {
-        fbotex_uninit(&p->pre_fbo[n]);
-        fbotex_uninit(&p->post_fbo[n]);
-    }
-
-    for (int pass = 0; pass < MAX_PRESCALE_PASSES; pass++) {
-        for (int step = 0; step < MAX_PRESCALE_STEPS; step++)
-            fbotex_uninit(&p->prescale_fbo[pass][step]);
-    }
 
     for (int n = 0; n < FBOSURFACES_MAX; n++)
         fbotex_uninit(&p->surfaces[n].fbotex);
 
+    for (int n = 0; n < MAX_SAVED_TEXTURES; n++)
+        fbotex_uninit(&p->hook_fbos[n]);
+
+    for (int n = 0; n < 2; n++)
+        fbotex_uninit(&p->vdpau_deinterleave_fbo[n]);
+
     gl_video_reset_surfaces(p);
+    gl_video_reset_hooks(p);
+
+    gl_sc_reset_error(p->sc);
 }
 
-void gl_video_update_profile(struct gl_video *p)
+// Warning: profile.start must point to a ta allocation, and the function
+//          takes over ownership.
+void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data)
 {
-    if (p->use_lut_3d)
-        return;
-
-    p->use_lut_3d = true;
-    check_gl_features(p);
+    if (gl_lcms_set_memory_profile(p->cms, icc_data))
+        reinit_from_options(p);
+}
 
-    reinit_rendering(p);
+bool gl_video_icc_auto_enabled(struct gl_video *p)
+{
+    return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false;
 }
 
 static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
@@ -711,14 +613,15 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
 {
     GL *gl = p->gl;
 
-    if (!p->cms || !p->use_lut_3d)
+    if (!p->use_lut_3d)
         return false;
 
-    if (!gl_lcms_has_changed(p->cms, prim, trc))
+    if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc))
         return true;
 
     struct lut3d *lut3d = NULL;
     if (!gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc) || !lut3d) {
+        p->use_lut_3d = false;
         return false;
     }
 
@@ -738,12 +641,14 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
 
     debug_check_gl(p, "after 3d lut creation");
 
+    talloc_free(lut3d);
+
     return true;
 }
 
 // Fill an img_tex struct from an FBO + some metadata
-static struct img_tex img_tex_fbo(struct fbotex *fbo, struct gl_transform t,
-                                  enum plane_type type, int components)
+static struct img_tex img_tex_fbo(struct fbotex *fbo, enum plane_type type,
+                                  int components)
 {
     assert(type != PLANE_NONE);
     return (struct img_tex){
@@ -756,8 +661,7 @@ static struct img_tex img_tex_fbo(struct fbotex *fbo, struct gl_transform t,
         .tex_h = fbo->rh,
         .w = fbo->lw,
         .h = fbo->lh,
-        .pre_transform = identity_trans,
-        .transform = t,
+        .transform = identity_trans,
         .components = components,
     };
 }
@@ -797,18 +701,19 @@ static void get_plane_source_transform(struct gl_video *p, int w, int h,
 }
 
 // Places a video_image's image textures + associated metadata into tex[]. The
-// number of textures is equal to p->plane_count.
+// number of textures is equal to p->plane_count. Any necessary plane offsets
+// are stored in off. (e.g. chroma position)
 static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg,
-                             struct img_tex tex[4])
+                             struct img_tex tex[4], struct gl_transform off[4])
 {
     assert(vimg->mpi);
 
     // Determine the chroma offset
-    struct gl_transform chroma = (struct gl_transform){{{0}}};
-
     float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
     float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
 
+    struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}};
+
     if (p->image_params.chroma_location != MP_CHROMA_CENTER) {
         int cx, cy;
         mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy);
@@ -821,11 +726,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg,
         chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
     }
 
-    // Make sure luma/chroma sizes are aligned.
-    // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2
-    // so luma (3,3) has to align with chroma (2,2).
-    chroma.m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w;
-    chroma.m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h;
+    // FIXME: account for rotation in the chroma offset
 
     // The existing code assumes we just have a single tex multiplier for
     // all of the planes. This may change in the future
@@ -856,17 +757,18 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg,
             .gl_target = t->gl_target,
             .multiplier = tex_mul,
             .use_integer = t->use_integer,
-            .tex_w = t->w,
-            .tex_h = t->h,
+            .tex_w = t->tex_w,
+            .tex_h = t->tex_h,
             .w = t->w,
             .h = t->h,
-            .transform = type == PLANE_CHROMA ? chroma : identity_trans,
             .components = p->image_desc.components[n],
-            .texture_la = t->gl_format == GL_LUMINANCE_ALPHA,
         };
-        get_plane_source_transform(p, t->w, t->h, &tex[n].pre_transform);
+        snprintf(tex[n].swizzle, sizeof(tex[n].swizzle), "%s", t->swizzle);
+        get_plane_source_transform(p, t->w, t->h, &tex[n].transform);
         if (p->image_params.rotate % 180 == 90)
             MPSWAP(int, tex[n].w, tex[n].h);
+
+        off[n] = type == PLANE_CHROMA ? chroma : identity_trans;
     }
 }
 
@@ -874,19 +776,21 @@ static void init_video(struct gl_video *p)
 {
     GL *gl = p->gl;
 
-    init_format(p->image_params.imgfmt, p);
-    p->gl_target = p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D;
-
-    check_gl_features(p);
-
-    if (p->hwdec_active) {
+    if (p->hwdec && p->hwdec->driver->imgfmt == p->image_params.imgfmt) {
         if (p->hwdec->driver->reinit(p->hwdec, &p->image_params) < 0)
             MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
-        init_format(p->image_params.imgfmt, p);
-        p->image_params.imgfmt = p->image_desc.id;
-        p->gl_target = p->hwdec->gl_texture_target;
+        init_image_desc(p, p->image_params.imgfmt);
+        const char **exts = p->hwdec->glsl_extensions;
+        for (int n = 0; exts && exts[n]; n++)
+            gl_sc_enable_extension(p->sc, (char *)exts[n]);
+        p->hwdec_active = true;
+    } else {
+        init_format(p, p->image_params.imgfmt, false);
     }
 
+    // Format-dependent checks.
+    check_gl_features(p);
+
     mp_image_params_guess_csp(&p->image_params);
 
     int eq_caps = MP_CSP_EQ_CAPS_GAMMA;
@@ -900,42 +804,65 @@ static void init_video(struct gl_video *p)
 
     debug_check_gl(p, "before video texture creation");
 
-    struct video_image *vimg = &p->image;
+    if (!p->hwdec_active) {
+        struct video_image *vimg = &p->image;
 
-    struct mp_image layout = {0};
-    mp_image_set_params(&layout, &p->image_params);
+        GLenum gl_target =
+            p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D;
 
-    for (int n = 0; n < p->plane_count; n++) {
-        struct texplane *plane = &vimg->planes[n];
+        struct mp_image layout = {0};
+        mp_image_set_params(&layout, &p->image_params);
 
-        plane->gl_target = p->gl_target;
+        for (int n = 0; n < p->plane_count; n++) {
+            struct texplane *plane = &vimg->planes[n];
 
-        plane->w = mp_image_plane_w(&layout, n);
-        plane->h = mp_image_plane_h(&layout, n);
+            plane->gl_target = gl_target;
+
+            plane->w = plane->tex_w = mp_image_plane_w(&layout, n);
+            plane->h = plane->tex_h = mp_image_plane_h(&layout, n);
 
-        if (!p->hwdec_active) {
             gl->ActiveTexture(GL_TEXTURE0 + n);
             gl->GenTextures(1, &plane->gl_texture);
-            gl->BindTexture(p->gl_target, plane->gl_texture);
+            gl->BindTexture(gl_target, plane->gl_texture);
 
-            gl->TexImage2D(p->gl_target, 0, plane->gl_internal_format,
+            gl->TexImage2D(gl_target, 0, plane->gl_internal_format,
                            plane->w, plane->h, 0,
                            plane->gl_format, plane->gl_type, NULL);
 
             int filter = plane->use_integer ? GL_NEAREST : GL_LINEAR;
-            gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, filter);
-            gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, filter);
-            gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-            gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-        }
+            gl->TexParameteri(gl_target, GL_TEXTURE_MIN_FILTER, filter);
+            gl->TexParameteri(gl_target, GL_TEXTURE_MAG_FILTER, filter);
+            gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+            gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 
-        MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, plane->w, plane->h);
+            MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, plane->w, plane->h);
+        }
+        gl->ActiveTexture(GL_TEXTURE0);
     }
-    gl->ActiveTexture(GL_TEXTURE0);
 
     debug_check_gl(p, "after video texture creation");
 
-    reinit_rendering(p);
+    gl_video_setup_hooks(p);
+}
+
+// Release any texture mappings associated with the current frame.
+static void unmap_current_image(struct gl_video *p)
+{
+    struct video_image *vimg = &p->image;
+
+    if (vimg->hwdec_mapped) {
+        assert(p->hwdec_active);
+        if (p->hwdec->driver->unmap)
+            p->hwdec->driver->unmap(p->hwdec);
+        memset(vimg->planes, 0, sizeof(vimg->planes));
+        vimg->hwdec_mapped = false;
+    }
+}
+
+static void unref_current_image(struct gl_video *p)
+{
+    unmap_current_image(p);
+    mp_image_unrefp(&p->image.mpi);
 }
 
 static void uninit_video(struct gl_video *p)
@@ -946,21 +873,21 @@ static void uninit_video(struct gl_video *p)
 
     struct video_image *vimg = &p->image;
 
+    unref_current_image(p);
+
     for (int n = 0; n < p->plane_count; n++) {
         struct texplane *plane = &vimg->planes[n];
 
-        if (!p->hwdec_active)
-            gl->DeleteTextures(1, &plane->gl_texture);
-        plane->gl_texture = 0;
+        gl->DeleteTextures(1, &plane->gl_texture);
         gl->DeleteBuffers(1, &plane->gl_buffer);
-        plane->gl_buffer = 0;
     }
-    mp_image_unrefp(&vimg->mpi);
+    *vimg = (struct video_image){0};
 
     // Invalidate image_params to ensure that gl_video_config() will call
     // init_video() on uninitialized gl_video.
     p->real_image_params = (struct mp_image_params){0};
     p->image_params = p->real_image_params;
+    p->hwdec_active = false;
 }
 
 static void pass_prepare_src_tex(struct gl_video *p)
@@ -975,9 +902,11 @@ static void pass_prepare_src_tex(struct gl_video *p)
 
         char texture_name[32];
         char texture_size[32];
+        char texture_rot[32];
         char pixel_size[32];
         snprintf(texture_name, sizeof(texture_name), "texture%d", n);
         snprintf(texture_size, sizeof(texture_size), "texture_size%d", n);
+        snprintf(texture_rot, sizeof(texture_rot), "texture_rot%d", n);
         snprintf(pixel_size, sizeof(pixel_size), "pixel_size%d", n);
 
         if (s->use_integer) {
@@ -991,6 +920,7 @@ static void pass_prepare_src_tex(struct gl_video *p)
             f[1] = s->tex_h;
         }
         gl_sc_uniform_vec2(sc, texture_size, f);
+        gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m);
         gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0],
                                                        1.0f / f[1]});
 
@@ -1022,7 +952,6 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
             if (!s->gl_tex)
                 continue;
             struct gl_transform tr = s->transform;
-            gl_transform_trans(s->pre_transform, &tr);
             float tx = (n / 2) * s->w;
             float ty = (n % 2) * s->h;
             gl_transform_vec(tr, &tx, &ty);
@@ -1038,7 +967,6 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
     debug_check_gl(p, "after rendering");
 }
 
-// flags: see render_pass_quad
 static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h,
                                const struct mp_rect *dst)
 {
@@ -1067,6 +995,34 @@ static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
                        &(struct mp_rect){0, 0, w, h});
 }
 
+// Copy a texture to the vec4 color, while increasing offset. Also applies
+// the texture multiplier to the sampled color
+static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img)
+{
+    int count = img.components;
+    assert(*offset + count <= 4);
+
+    int id = pass_bind(p, img);
+    char src[5] = {0};
+    char dst[5] = {0};
+    const char *tex_fmt = img.swizzle[0] ? img.swizzle : "rgba";
+    const char *dst_fmt = "rgba";
+    for (int i = 0; i < count; i++) {
+        src[i] = tex_fmt[i];
+        dst[i] = dst_fmt[*offset + i];
+    }
+
+    if (img.use_integer) {
+        uint64_t tex_max = 1ull << p->image_desc.component_full_bits;
+        img.multiplier *= 1.0 / (tex_max - 1);
+    }
+
+    GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n",
+          dst, img.multiplier, id, id, src);
+
+    *offset += count;
+}
+
 static void skip_unused(struct gl_video *p, int num_components)
 {
     for (int i = num_components; i < 4; i++)
@@ -1083,9 +1039,202 @@ static void uninit_scaler(struct gl_video *p, struct scaler *scaler)
     scaler->initialized = false;
 }
 
-static void load_shader(struct gl_video *p, const char *body)
+static void hook_prelude(struct gl_video *p, const char *name, int id,
+                         struct img_tex tex)
 {
-    gl_sc_hadd(p->sc, body);
+    GLSLHF("#define %s_raw texture%d\n", name, id);
+    GLSLHF("#define %s_pos texcoord%d\n", name, id);
+    GLSLHF("#define %s_size texture_size%d\n", name, id);
+    GLSLHF("#define %s_rot texture_rot%d\n", name, id);
+    GLSLHF("#define %s_pt pixel_size%d\n", name, id);
+
+    // Set up the sampling functions
+    GLSLHF("#define %s_tex(pos) (%f * vec4(texture(%s_raw, pos)).%s)\n",
+           name, tex.multiplier, name, tex.swizzle[0] ? tex.swizzle : "rgba");