summaryrefslogtreecommitdiffstats
path: root/video/out/gpu/video.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/gpu/video.c')
-rw-r--r--video/out/gpu/video.c3809
1 files changed, 3809 insertions, 0 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
new file mode 100644
index 0000000000..e36fde60e8
--- /dev/null
+++ b/video/out/gpu/video.c
@@ -0,0 +1,3809 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include <libavutil/common.h>
+#include <libavutil/lfg.h>
+
+#include "video.h"
+
+#include "misc/bstr.h"
+#include "options/m_config.h"
+#include "common/global.h"
+#include "options/options.h"
+#include "utils.h"
+#include "hwdec.h"
+#include "osd.h"
+#include "ra.h"
+#include "stream/stream.h"
+#include "video_shaders.h"
+#include "user_shaders.h"
+#include "video/out/filter_kernels.h"
+#include "video/out/aspect.h"
+#include "video/out/dither.h"
+#include "video/out/vo.h"
+
+// scale/cscale arguments that map directly to shader filter routines.
+// Note that the convolution filters are not included in this list.
+static const char *const fixed_scale_filters[] = {
+ "bilinear",
+ "bicubic_fast",
+ "oversample",
+ NULL
+};
+static const char *const fixed_tscale_filters[] = {
+ "oversample",
+ "linear",
+ NULL
+};
+
+// must be sorted, and terminated with 0
+int filter_sizes[] =
+ {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0};
+int tscale_sizes[] = {2, 4, 6, 0}; // limited by TEXUNIT_VIDEO_NUM
+
+struct vertex_pt {
+ float x, y;
+};
+
+struct vertex {
+ struct vertex_pt position;
+ struct vertex_pt texcoord[TEXUNIT_VIDEO_NUM];
+};
+
+static const struct ra_renderpass_input vertex_vao[] = {
+ {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)},
+ {"texcoord0", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[0])},
+ {"texcoord1", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[1])},
+ {"texcoord2", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[2])},
+ {"texcoord3", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[3])},
+ {"texcoord4", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[4])},
+ {"texcoord5", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[5])},
+ {0}
+};
+
+struct texplane {
+ struct ra_tex *tex;
+ int w, h;
+ bool flipped;
+};
+
+struct video_image {
+ struct texplane planes[4];
+ struct mp_image *mpi; // original input image
+ uint64_t id; // unique ID identifying mpi contents
+ bool hwdec_mapped;
+};
+
+enum plane_type {
+ PLANE_NONE = 0,
+ PLANE_RGB,
+ PLANE_LUMA,
+ PLANE_CHROMA,
+ PLANE_ALPHA,
+ PLANE_XYZ,
+};
+
+static const char *plane_names[] = {
+ [PLANE_NONE] = "unknown",
+ [PLANE_RGB] = "rgb",
+ [PLANE_LUMA] = "luma",
+ [PLANE_CHROMA] = "chroma",
+ [PLANE_ALPHA] = "alpha",
+ [PLANE_XYZ] = "xyz",
+};
+
+// A self-contained description of a source image which can be bound to a
+// texture unit and sampled from. Contains metadata about how it's to be used
+struct img_tex {
+ enum plane_type type; // must be set to something non-zero
+ int components; // number of relevant coordinates
+ float multiplier; // multiplier to be used when sampling
+ struct ra_tex *tex;
+ int w, h; // logical size (after transformation)
+ struct gl_transform transform; // rendering transformation
+};
+
+// A named img_tex, for user scripting purposes
+struct saved_tex {
+ const char *name;
+ struct img_tex tex;
+};
+
+// A texture hook. This is some operation that transforms a named texture as
+// soon as it's generated
+struct tex_hook {
+ const char *save_tex;
+ const char *hook_tex[SHADER_MAX_HOOKS];
+ const char *bind_tex[TEXUNIT_VIDEO_NUM];
+ int components; // how many components are relevant (0 = same as input)
+ void *priv; // this gets talloc_freed when the tex_hook is removed
+ void (*hook)(struct gl_video *p, struct img_tex tex, // generates GLSL
+ struct gl_transform *trans, void *priv);
+ bool (*cond)(struct gl_video *p, struct img_tex tex, void *priv);
+};
+
+struct fbosurface {
+ struct fbotex fbotex;
+ uint64_t id;
+ double pts;
+};
+
+#define FBOSURFACES_MAX 10
+
+struct cached_file {
+ char *path;
+ struct bstr body;
+};
+
+struct pass_info {
+ struct bstr desc;
+ struct mp_pass_perf perf;
+};
+
+#define PASS_INFO_MAX (SHADER_MAX_PASSES + 32)
+
+struct dr_buffer {
+ struct ra_buf *buf;
+ // The mpi reference will keep the data from being recycled (or from other
+ // references gaining write access) while the GPU is accessing the buffer.
+ struct mp_image *mpi;
+};
+
+struct gl_video {
+ struct ra *ra;
+
+ struct mpv_global *global;
+ struct mp_log *log;
+ struct gl_video_opts opts;
+ struct m_config_cache *opts_cache;
+ struct gl_lcms *cms;
+
+ int fb_depth; // actual bits available in GL main framebuffer
+ struct m_color clear_color;
+ bool force_clear_color;
+
+ struct gl_shader_cache *sc;
+
+ struct osd_state *osd_state;
+ struct mpgl_osd *osd;
+ double osd_pts;
+
+ struct ra_tex *lut_3d_texture;
+ bool use_lut_3d;
+ int lut_3d_size[3];
+
+ struct ra_tex *dither_texture;
+
+ struct mp_image_params real_image_params; // configured format
+ struct mp_image_params image_params; // texture format (mind hwdec case)
+ struct ra_imgfmt_desc ra_format; // texture format
+ int plane_count;
+
+ bool is_gray;
+ bool has_alpha;
+ char color_swizzle[5];
+ bool use_integer_conversion;
+
+ struct video_image image;
+
+ struct dr_buffer *dr_buffers;
+ int num_dr_buffers;
+
+ bool using_dr_path;
+
+ bool dumb_mode;
+ bool forced_dumb_mode;
+
+ const struct ra_format *fbo_format;
+ struct fbotex merge_fbo[4];
+ struct fbotex scale_fbo[4];
+ struct fbotex integer_fbo[4];
+ struct fbotex indirect_fbo;
+ struct fbotex blend_subs_fbo;
+ struct fbotex screen_fbo;
+ struct fbotex output_fbo;
+ struct fbosurface surfaces[FBOSURFACES_MAX];
+ struct fbotex vdpau_deinterleave_fbo[2];
+ struct ra_buf *hdr_peak_ssbo;
+
+ // user pass descriptions and textures
+ struct tex_hook tex_hooks[SHADER_MAX_PASSES];
+ int tex_hook_num;
+ struct gl_user_shader_tex user_textures[SHADER_MAX_PASSES];
+ int user_tex_num;
+
+ int surface_idx;
+ int surface_now;
+ int frames_drawn;
+ bool is_interpolated;
+ bool output_fbo_valid;
+
+ // state for configured scalers
+ struct scaler scaler[SCALER_COUNT];
+
+ struct mp_csp_equalizer_state *video_eq;
+
+ struct mp_rect src_rect; // displayed part of the source video
+ struct mp_rect dst_rect; // video rectangle on output window
+ struct mp_osd_res osd_rect; // OSD size/margins
+
+ // temporary during rendering
+ struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ struct compute_info pass_compute; // compute shader metadata for this pass
+ int pass_tex_num;
+ int texture_w, texture_h;
+ struct gl_transform texture_offset; // texture transform without rotation
+ int components;
+ bool use_linear;
+ float user_gamma;
+
+ // pass info / metrics
+ struct pass_info pass_fresh[PASS_INFO_MAX];
+ struct pass_info pass_redraw[PASS_INFO_MAX];
+ struct pass_info *pass;
+ int pass_idx;
+ struct timer_pool *upload_timer;
+ struct timer_pool *blit_timer;
+ struct timer_pool *osd_timer;
+
+ // intermediate textures
+ struct saved_tex saved_tex[SHADER_MAX_SAVED];
+ int saved_tex_num;
+ struct fbotex hook_fbos[SHADER_MAX_SAVED];
+ int hook_fbo_num;
+
+ int frames_uploaded;
+ int frames_rendered;
+ AVLFG lfg;
+
+ // Cached because computing it can take relatively long
+ int last_dither_matrix_size;
+ float *last_dither_matrix;
+
+ struct cached_file *files;
+ int num_files;
+
+ struct ra_hwdec *hwdec;
+ struct ra_hwdec_mapper *hwdec_mapper;
+ bool hwdec_active;
+
+ bool dsi_warned;
+ bool broken_frame; // temporary error state
+};
+
+static const struct gl_video_opts gl_video_opts_def = {
+ .dither_algo = DITHER_FRUIT,
+ .dither_depth = -1,
+ .dither_size = 6,
+ .temporal_dither_period = 1,
+ .fbo_format = "auto",
+ .sigmoid_center = 0.75,
+ .sigmoid_slope = 6.5,
+ .scaler = {
+ {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
+ .cutoff = 0.001}, // scale
+ {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}},
+ .cutoff = 0.001}, // dscale
+ {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
+ .cutoff = 0.001}, // cscale
+ {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}},
+ .clamp = 1, }, // tscale
+ },
+ .scaler_resizes_only = 1,
+ .scaler_lut_size = 6,
+ .interpolation_threshold = 0.0001,
+ .alpha_mode = ALPHA_BLEND_TILES,
+ .background = {0, 0, 0, 255},
+ .gamma = 1.0f,
+ .tone_mapping = TONE_MAPPING_MOBIUS,
+ .tone_mapping_param = NAN,
+ .tone_mapping_desat = 2.0,
+ .early_flush = -1,
+};
+
+static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
+ struct bstr name, struct bstr param);
+
+static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
+ struct bstr name, struct bstr param);
+
+#define OPT_BASE_STRUCT struct gl_video_opts
+
+#define SCALER_OPTS(n, i) \
+ OPT_STRING_VALIDATE(n, scaler[i].kernel.name, 0, validate_scaler_opt), \
+ OPT_FLOAT(n"-param1", scaler[i].kernel.params[0], 0), \
+ OPT_FLOAT(n"-param2", scaler[i].kernel.params[1], 0), \
+ OPT_FLOAT(n"-blur", scaler[i].kernel.blur, 0), \
+ OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0), \
+ OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0), \
+ OPT_FLOAT(n"-wparam", scaler[i].window.params[0], 0), \
+ OPT_FLOAT(n"-wblur", scaler[i].window.blur, 0), \
+ OPT_FLOATRANGE(n"-wtaper", scaler[i].window.taper, 0, 0.0, 1.0), \
+ OPT_FLOATRANGE(n"-clamp", scaler[i].clamp, 0, 0.0, 1.0), \
+ OPT_FLOATRANGE(n"-radius", scaler[i].radius, 0, 0.5, 16.0), \
+ OPT_FLOATRANGE(n"-antiring", scaler[i].antiring, 0, 0.0, 1.0), \
+ OPT_STRING_VALIDATE(n"-window", scaler[i].window.name, 0, validate_window_opt)
+
+const struct m_sub_options gl_video_conf = {
+ .opts = (const m_option_t[]) {
+ OPT_CHOICE("gpu-dumb-mode", dumb_mode, 0,
+ ({"auto", 0}, {"yes", 1}, {"no", -1})),
+ OPT_FLOATRANGE("gamma-factor", gamma, 0, 0.1, 2.0),
+ OPT_FLAG("gamma-auto", gamma_auto, 0),
+ OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
+ OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
+ OPT_CHOICE("tone-mapping", tone_mapping, 0,
+ ({"clip", TONE_MAPPING_CLIP},
+ {"mobius", TONE_MAPPING_MOBIUS},
+ {"reinhard", TONE_MAPPING_REINHARD},
+ {"hable", TONE_MAPPING_HABLE},
+ {"gamma", TONE_MAPPING_GAMMA},
+ {"linear", TONE_MAPPING_LINEAR})),
+ OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0),
+ OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
+ OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0),
+ OPT_FLAG("gamut-warning", gamut_warning, 0),
+ OPT_FLAG("opengl-pbo", pbo, 0),
+ SCALER_OPTS("scale", SCALER_SCALE),
+ SCALER_OPTS("dscale", SCALER_DSCALE),
+ SCALER_OPTS("cscale", SCALER_CSCALE),
+ SCALER_OPTS("tscale", SCALER_TSCALE),
+ OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10),
+ OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0),
+ OPT_FLAG("linear-scaling", linear_scaling, 0),
+ OPT_FLAG("correct-downscaling", correct_downscaling, 0),
+ OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0),
+ OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0),
+ OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0),
+ OPT_STRING("fbo-format", fbo_format, 0),
+ OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
+ ({"no", -1}, {"auto", 0})),
+ OPT_CHOICE("dither", dither_algo, 0,
+ ({"fruit", DITHER_FRUIT},
+ {"ordered", DITHER_ORDERED},
+ {"no", DITHER_NONE})),
+ OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
+ OPT_FLAG("temporal-dither", temporal_dither, 0),
+ OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128),
+ OPT_CHOICE("alpha", alpha_mode, 0,
+ ({"no", ALPHA_NO},
+ {"yes", ALPHA_YES},
+ {"blend", ALPHA_BLEND},
+ {"blend-tiles", ALPHA_BLEND_TILES})),
+ OPT_FLAG("opengl-rectangle-textures", use_rectangle, 0),
+ OPT_COLOR("background", background, 0),
+ OPT_FLAG("interpolation", interpolation, 0),
+ OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0),
+ OPT_CHOICE("blend-subtitles", blend_subs, 0,
+ ({"no", BLEND_SUBS_NO},
+ {"yes", BLEND_SUBS_YES},
+ {"video", BLEND_SUBS_VIDEO})),
+ OPT_PATHLIST("glsl-shaders", user_shaders, 0),
+ OPT_CLI_ALIAS("glsl-shader", "glsl-shaders-append"),
+ OPT_FLAG("deband", deband, 0),
+ OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
+ OPT_FLOAT("sharpen", unsharp, 0),
+ OPT_INTRANGE("gpu-tex-pad-x", tex_pad_x, 0, 0, 4096),
+ OPT_INTRANGE("gpu-tex-pad-y", tex_pad_y, 0, 0, 4096),
+ OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0),
+ OPT_STRING("gpu-shader-cache-dir", shader_cache_dir, 0),
+ OPT_REPLACED("hdr-tone-mapping", "tone-mapping"),
+ OPT_REPLACED("opengl-shaders", "glsl-shaders"),
+ OPT_CLI_ALIAS("opengl-shader", "glsl-shaders-append"),
+ OPT_REPLACED("opengl-shader-cache-dir", "gpu-shader-cache-dir"),
+ OPT_REPLACED("opengl-tex-pad-x", "gpu-tex-pad-x"),
+ OPT_REPLACED("opengl-tex-pad-y", "gpu-tex-pad-y"),
+ OPT_REPLACED("opengl-fbo-format", "fbo-format"),
+ OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"),
+ OPT_REPLACED("opengl-gamma", "gpu-gamma"),
+ {0}
+ },
+ .size = sizeof(struct gl_video_opts),
+ .defaults = &gl_video_opts_def,
+};
+
+static void uninit_rendering(struct gl_video *p);
+static void uninit_scaler(struct gl_video *p, struct scaler *scaler);
+static void check_gl_features(struct gl_video *p);
+static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id);
+static const char *handle_scaler_opt(const char *name, bool tscale);
+static void reinit_from_options(struct gl_video *p);
+static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]);
+static void gl_video_setup_hooks(struct gl_video *p);
+
+#define GLSL(x) gl_sc_add(p->sc, #x "\n");
+#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
+#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
+#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__)
+
+static struct bstr load_cached_file(struct gl_video *p, const char *path)
+{
+ if (!path || !path[0])
+ return (struct bstr){0};
+ for (int n = 0; n < p->num_files; n++) {
+ if (strcmp(p->files[n].path, path) == 0)
+ return p->files[n].body;
+ }
+ // not found -> load it
+ struct bstr s = stream_read_file(path, p, p->global, 1024000); // 1024 kB
+ if (s.len) {
+ struct cached_file new = {
+ .path = talloc_strdup(p, path),
+ .body = s,
+ };
+ MP_TARRAY_APPEND(p, p->files, p->num_files, new);
+ return new.body;
+ }
+ return (struct bstr){0};
+}
+
+static void debug_check_gl(struct gl_video *p, const char *msg)
+{
+ if (p->ra->fns->debug_marker)
+ p->ra->fns->debug_marker(p->ra, msg);
+}
+
+static void gl_video_reset_surfaces(struct gl_video *p)
+{
+ for (int i = 0; i < FBOSURFACES_MAX; i++) {
+ p->surfaces[i].id = 0;
+ p->surfaces[i].pts = MP_NOPTS_VALUE;
+ }
+ p->surface_idx = 0;
+ p->surface_now = 0;
+ p->frames_drawn = 0;
+ p->output_fbo_valid = false;
+}
+
+static void gl_video_reset_hooks(struct gl_video *p)
+{
+ for (int i = 0; i < p->tex_hook_num; i++)
+ talloc_free(p->tex_hooks[i].priv);
+
+ for (int i = 0; i < p->user_tex_num; i++)
+ ra_tex_free(p->ra, &p->user_textures[i].tex);
+
+ p->tex_hook_num = 0;
+ p->user_tex_num = 0;
+}
+
+static inline int fbosurface_wrap(int id)
+{
+ id = id % FBOSURFACES_MAX;
+ return id < 0 ? id + FBOSURFACES_MAX : id;
+}
+
+static void reinit_osd(struct gl_video *p)
+{
+ mpgl_osd_destroy(p->osd);
+ p->osd = NULL;
+ if (p->osd_state)
+ p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state);
+}
+
+static void uninit_rendering(struct gl_video *p)
+{
+ for (int n = 0; n < SCALER_COUNT; n++)
+ uninit_scaler(p, &p->scaler[n]);
+
+ ra_tex_free(p->ra, &p->dither_texture);
+
+ for (int n = 0; n < 4; n++) {
+ fbotex_uninit(&p->merge_fbo[n]);
+ fbotex_uninit(&p->scale_fbo[n]);
+ fbotex_uninit(&p->integer_fbo[n]);
+ }
+
+ fbotex_uninit(&p->indirect_fbo);
+ fbotex_uninit(&p->blend_subs_fbo);
+ fbotex_uninit(&p->screen_fbo);
+ fbotex_uninit(&p->output_fbo);
+
+ for (int n = 0; n < FBOSURFACES_MAX; n++)
+ fbotex_uninit(&p->surfaces[n].fbotex);
+
+ for (int n = 0; n < SHADER_MAX_SAVED; n++)
+ fbotex_uninit(&p->hook_fbos[n]);
+
+ for (int n = 0; n < 2; n++)
+ fbotex_uninit(&p->vdpau_deinterleave_fbo[n]);
+
+ gl_video_reset_surfaces(p);
+ gl_video_reset_hooks(p);
+
+ gl_sc_reset_error(p->sc);
+}
+
+bool gl_video_gamma_auto_enabled(struct gl_video *p)
+{
+ return p->opts.gamma_auto;
+}
+
+struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p)
+{
+ return (struct mp_colorspace) {
+ .primaries = p->opts.target_prim,
+ .gamma = p->opts.target_trc,
+ };
+}
+
+// Warning: profile.start must point to a ta allocation, and the function
+// takes over ownership.
+void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data)
+{
+ if (gl_lcms_set_memory_profile(p->cms, icc_data))
+ reinit_from_options(p);
+}
+
+bool gl_video_icc_auto_enabled(struct gl_video *p)
+{
+ return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false;
+}
+
+static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
+ enum mp_csp_trc trc)
+{
+ if (!p->use_lut_3d)
+ return false;
+
+ struct AVBufferRef *icc = NULL;
+ if (p->image.mpi)
+ icc = p->image.mpi->icc_profile;
+
+ if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc))
+ return true;
+
+ // GLES3 doesn't provide filtered 16 bit integer textures
+ // GLES2 doesn't even provide 3D textures
+ const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4);
+ if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) {
+ p->use_lut_3d = false;
+ MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n");
+ return false;
+ }
+
+ struct lut3d *lut3d = NULL;
+ if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) {
+ p->use_lut_3d = false;
+ return false;
+ }
+
+ ra_tex_free(p->ra, &p->lut_3d_texture);
+
+ struct ra_tex_params params = {
+ .dimensions = 3,
+ .w = lut3d->size[0],
+ .h = lut3d->size[1],
+ .d = lut3d->size[2],
+ .format = fmt,
+ .render_src = true,
+ .src_linear = true,
+ .initial_data = lut3d->data,
+ };
+ p->lut_3d_texture = ra_tex_create(p->ra, &params);
+
+ debug_check_gl(p, "after 3d lut creation");
+
+ for (int i = 0; i < 3; i++)
+ p->lut_3d_size[i] = lut3d->size[i];
+
+ talloc_free(lut3d);
+
+ return true;
+}
+
+// Fill an img_tex struct from an FBO + some metadata
+static struct img_tex img_tex_fbo(struct fbotex *fbo, enum plane_type type,
+ int components)
+{
+ assert(type != PLANE_NONE);
+ return (struct img_tex){
+ .type = type,
+ .tex = fbo->tex,
+ .multiplier = 1.0,
+ .w = fbo->lw,
+ .h = fbo->lh,
+ .transform = identity_trans,
+ .components = components,
+ };
+}
+
+// Bind an img_tex to a free texture unit and return its ID. At most
+// TEXUNIT_VIDEO_NUM texture units can be bound at once
+static int pass_bind(struct gl_video *p, struct img_tex tex)
+{
+ assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM);
+ p->pass_tex[p->pass_tex_num] = tex;
+ return p->pass_tex_num++;
+}
+
+// Rotation by 90° and flipping.
+// w/h is used for recentering.
+static void get_transform(float w, float h, int rotate, bool flip,
+ struct gl_transform *out_tr)
+{
+ int a = rotate % 90 ? 0 : rotate / 90;
+ int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc.
+ int cos90[4] = {1, 0, -1, 0};
+ struct gl_transform tr = {{{ cos90[a], sin90[a]},
+ {-sin90[a], cos90[a]}}};
+
+ // basically, recenter to keep the whole image in view
+ float b[2] = {1, 1};
+ gl_transform_vec(tr, &b[0], &b[1]);
+ tr.t[0] += b[0] < 0 ? w : 0;
+ tr.t[1] += b[1] < 0 ? h : 0;
+
+ if (flip) {
+ struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}};
+ gl_transform_trans(fliptr, &tr);
+ }
+
+ *out_tr = tr;
+}
+
+// Return the chroma plane upscaled to luma size, but with additional padding
+// for image sizes not aligned to subsampling.
+static int chroma_upsize(int size, int pixel)
+{
+ return (size + pixel - 1) / pixel * pixel;
+}
+
+// If a and b are on the same plane, return what plane type should be used.
+// If a or b are none, the other type always wins.
+// Usually: LUMA/RGB/XYZ > CHROMA > ALPHA
+static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b)
+{
+ if (a == PLANE_NONE)
+ return b;
+ if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ)
+ return b;
+ if (b != PLANE_NONE && a == PLANE_ALPHA)
+ return b;
+ return a;
+}
+
+// Places a video_image's image textures + associated metadata into tex[]. The
+// number of textures is equal to p->plane_count. Any necessary plane offsets
+// are stored in off. (e.g. chroma position)
+static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg,
+ struct img_tex tex[4], struct gl_transform off[4])
+{
+ assert(vimg->mpi);
+
+ int w = p->image_params.w;
+ int h = p->image_params.h;
+
+ // Determine the chroma offset
+ float ls_w = 1.0 / p->ra_format.chroma_w;
+ float ls_h = 1.0 / p->ra_format.chroma_h;
+
+ struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}};
+
+ if (p->image_params.chroma_location != MP_CHROMA_CENTER) {
+ int cx, cy;
+ mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy);
+ // By default texture coordinates are such that chroma is centered with
+ // any chroma subsampling. If a specific direction is given, make it
+ // so that the luma and chroma sample line up exactly.
+ // For 4:4:4, setting chroma location should have no effect at all.
+ // luma sample size (in chroma coord. space)
+ chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
+ chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ }
+
+ int msb_valid_bits =
+ p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0);
+ // The existing code assumes we just have a single tex multiplier for
+ // all of the planes. This may change in the future
+ float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.color.space,
+ msb_valid_bits,
+ p->ra_format.component_bits);
+
+ memset(tex, 0, 4 * sizeof(tex[0]));
+ for (int n = 0; n < p->plane_count; n++) {
+ struct texplane *t = &vimg->planes[n];
+
+ enum plane_type type = PLANE_NONE;
+ for (int i = 0; i < 4; i++) {
+ int c = p->ra_format.components[n][i];
+ enum plane_type ctype;
+ if (c == 0) {
+ ctype = PLANE_NONE;
+ } else if (c == 4) {
+ ctype = PLANE_ALPHA;
+ } else if (p->image_params.color.space == MP_CSP_RGB) {
+ ctype = PLANE_RGB;
+ } else if (p->image_params.color.space == MP_CSP_XYZ) {
+ ctype = PLANE_XYZ;
+ } else {
+ ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA;
+ }
+ type = merge_plane_types(type, ctype);
+ }
+
+ tex[n] = (struct img_tex){
+ .type = type,
+ .tex = t->tex,
+ .multiplier = tex_mul,
+ .w = t->w,
+ .h = t->h,
+ };
+
+ for (int i = 0; i < 4; i++)
+ tex[n].components += !!p->ra_format.components[n][i];
+
+ get_transform(t->w, t->h, p->image_params.rotate, t->flipped,
+ &tex[n].transform);
+ if (p->image_params.rotate % 180 == 90)
+ MPSWAP(int, tex[n].w, tex[n].h);
+
+ off[n] = identity_trans;
+
+ if (type == PLANE_CHROMA) {
+ struct gl_transform rot;
+ get_transform(0, 0, p->image_params.rotate, true, &rot);
+
+ struct gl_transform tr = chroma;
+ gl_transform_vec(rot, &tr.t[0], &tr.t[1]);
+
+ float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w;
+ float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h;
+
+ // Adjust the chroma offset if the real chroma size is fractional
+ // due image sizes not aligned to chroma subsampling.
+ struct gl_transform rot2;
+ get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2);
+ if (rot2.m[0][0] < 0)
+ tr.t[0] += dx;
+ if (rot2.m[1][0] < 0)
+ tr.t[0] += dy;
+ if (rot2.m[0][1] < 0)
+ tr.t[1] += dx;
+ if (rot2.m[1][1] < 0)
+ tr.t[1] += dy;
+
+ off[n] = tr;
+ }
+ }
+}
+
+// Return the index of the given component (assuming all non-padding components
+// of all planes are concatenated into a linear list).
+static int find_comp(struct ra_imgfmt_desc *desc, int component)
+{
+ int cur = 0;
+ for (int n = 0; n < desc->num_planes; n++) {
+ for (int i = 0; i < 4; i++) {
+ if (desc->components[n][i]) {
+ if (desc->components[n][i] == component)
+ return cur;
+ cur++;
+ }
+ }
+ }
+ return -1;
+}
+
+static void init_video(struct gl_video *p)
+{
+ p->use_integer_conversion = false;
+
+ if (p->hwdec && ra_hwdec_test_format(p->hwdec, p->image_params.imgfmt)) {
+ if (p->hwdec->driver->overlay_frame) {
+ MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed "
+ "on the video!\n");
+ } else {
+ p->hwdec_mapper = ra_hwdec_mapper_create(p->hwdec, &p->image_params);
+ if (!p->hwdec_mapper)
+ MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
+ }
+ if (p->hwdec_mapper)
+ p->image_params = p->hwdec_mapper->dst_params;
+ const char **exts = p->hwdec->glsl_extensions;
+ for (int n = 0; exts && exts[n]; n++)
+ gl_sc_enable_extension(p->sc, (char *)exts[n]);
+ p->hwdec_active = true;
+ }
+
+ p->ra_format = (struct ra_imgfmt_desc){0};
+ ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format);
+
+ p->plane_count = p->ra_format.num_planes;
+
+ p->has_alpha = false;
+ p->is_gray = true;
+
+ for (int n = 0; n < p->ra_format.num_planes; n++) {
+ for (int i = 0; i < 4; i++) {
+ if (p->ra_format.components[n][i]) {
+ p->has_alpha |= p->ra_format.components[n][i] == 4;
+ p->is_gray &= p->ra_format.components[n][i] == 1 ||
+ p->ra_format.components[n][i] == 4;
+ }
+ }
+ }
+
+ for (int c = 0; c < 4; c++) {
+ int loc = find_comp(&p->ra_format, c + 1);
+ p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0];
+ }
+ p->color_swizzle[4] = '\0';
+
+ // Format-dependent checks.
+ check_gl_features(p);
+
+ mp_image_params_guess_csp(&p->image_params);
+
+ av_lfg_init(&p->lfg, 1);
+
+ debug_check_gl(p, "before video texture creation");
+
+ if (!p->hwdec_active) {
+ struct video_image *vimg = &p->image;
+
+ struct mp_image layout = {0};
+ mp_image_set_params(&layout, &p->image_params);
+
+ for (int n = 0; n < p->plane_count; n++) {
+ struct texplane *plane = &vimg->planes[n];
+ const struct ra_format *format = p->ra_format.planes[n];
+
+ plane->w = mp_image_plane_w(&layout, n);
+ plane->h = mp_image_plane_h(&layout, n);
+
+ struct ra_tex_params params = {
+ .dimensions = 2,
+ .w = plane->w + p->opts.tex_pad_x,
+ .h = plane->h + p->opts.tex_pad_y,
+ .d = 1,
+ .format = format,
+ .render_src = true,
+ .src_linear = format->linear_filter,
+ .non_normalized = p->opts.use_rectangle,
+ .host_mutable = true,
+ };
+
+ MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n,
+ params.w, params.h);
+
+ plane->tex = ra_tex_create(p->ra, &params);
+ if (!plane->tex)
+ abort(); // shit happens
+
+ p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT;
+ }
+ }
+
+ debug_check_gl(p, "after video texture creation");
+
+ gl_video_setup_hooks(p);
+}
+
+// Release any texture mappings associated with the current frame.
+static void unmap_current_image(struct gl_video *p)
+{
+ struct video_image *vimg = &p->image;
+
+ if (vimg->hwdec_mapped) {
+ assert(p->hwdec_active && p->hwdec_mapper);
+ ra_hwdec_mapper_unmap(p->hwdec_mapper);
+ memset(vimg->planes, 0, sizeof(vimg->planes));
+ vimg->hwdec_mapped = false;
+ vimg->id = 0; // needs to be mapped again
+ }
+}
+
+static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr)
+{
+ for (int i = 0; i < p->num_dr_buffers; i++) {
+ struct dr_buffer *buffer = &p->dr_buffers[i];
+ uint8_t *bufptr = buffer->buf->data;
+ size_t size = buffer->buf->params.size;
+ if (ptr >= bufptr && ptr < bufptr + size)
+ return buffer;
+ }
+
+ return NULL;
+}
+
+static void gc_pending_dr_fences(struct gl_video *p, bool force)
+{
+again:;
+ for (int n = 0; n < p->num_dr_buffers; n++) {
+ struct dr_buffer *buffer = &p->dr_buffers[n];
+ if (!buffer->mpi)
+ continue;
+
+ bool res = p->ra->fns->buf_poll(p->ra, buffer->buf);
+ if (res || force) {
+ // Unreferencing the image could cause gl_video_dr_free_buffer()
+ // to be called by the talloc destructor (if it was the last
+ // reference). This will implicitly invalidate the buffer pointer
+ // and change the p->dr_buffers array. To make it worse, it could
+ // free multiple dr_buffers due to weird theoretical corner cases.
+ // This is also why we use the goto to iterate again from the
+ // start, because everything gets fucked up. Hail satan!
+ struct mp_image *ref = buffer->mpi;
+ buffer->mpi = NULL;
+ talloc_free(ref);
+ goto again;
+ }
+ }
+}
+
+static void unref_current_image(struct gl_video *p)
+{
+ unmap_current_image(p);
+ p->image.id = 0;
+
+ mp_image_unrefp(&p->image.mpi);
+
+ // While we're at it, also garbage collect pending fences in here to
+ // get it out of the way.
+ gc_pending_dr_fences(p, false);
+}
+
+// If overlay mode is used, make sure to remove the overlay.
+// Be careful with this. Removing the overlay and adding another one will
+// lead to flickering artifacts.
+static void unmap_overlay(struct gl_video *p)
+{
+ if (p->hwdec_active && p->hwdec->driver->overlay_frame)
+ p->hwdec->driver->overlay_frame(p->hwdec, NULL, NULL, NULL, true);
+}
+
+static void uninit_video(struct gl_video *p)
+{
+ uninit_rendering(p);
+
+ struct video_image *vimg = &p->image;
+
+ unmap_overlay(p);
+ unref_current_image(p);
+
+ for (int n = 0; n < p->plane_count; n++) {
+ struct texplane *plane = &vimg->planes[n];
+ ra_tex_free(p->ra, &plane->tex);
+ }
+ *vimg = (struct video_image){0};
+
+ // Invalidate image_params to ensure that gl_video_config() will call
+ // init_video() on uninitialized gl_video.
+ p->real_image_params = (struct mp_image_params){0};
+ p->image_params = p->real_image_params;
+ p->hwdec_active = false;
+ ra_hwdec_mapper_free(&p->hwdec_mapper);
+}
+
+static void pass_record(struct gl_video *p, struct mp_pass_perf perf)
+{
+ if (!p->pass || p->pass_idx == PASS_INFO_MAX)
+ return;
+
+ struct pass_info *pass = &p->pass[p->pass_idx];
+ pass->perf = perf;
+
+ if (pass->desc.len == 0)
+ bstr_xappend(p, &pass->desc, bstr0("(unknown)"));
+
+ p->pass_idx++;
+}
+
+PRINTF_ATTRIBUTE(2, 3)
+static void pass_describe(struct gl_video *p, const char *textf, ...)
+{
+ if (!p->pass || p->pass_idx == PASS_INFO_MAX)
+ return;
+
+ struct pass_info *pass = &p->pass[p->pass_idx];
+
+