diff options
Diffstat (limited to 'video/out/gpu/video.c')
-rw-r--r-- | video/out/gpu/video.c | 3809 |
1 files changed, 3809 insertions, 0 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c new file mode 100644 index 0000000000..e36fde60e8 --- /dev/null +++ b/video/out/gpu/video.c @@ -0,0 +1,3809 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <math.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include <assert.h> + +#include <libavutil/common.h> +#include <libavutil/lfg.h> + +#include "video.h" + +#include "misc/bstr.h" +#include "options/m_config.h" +#include "common/global.h" +#include "options/options.h" +#include "utils.h" +#include "hwdec.h" +#include "osd.h" +#include "ra.h" +#include "stream/stream.h" +#include "video_shaders.h" +#include "user_shaders.h" +#include "video/out/filter_kernels.h" +#include "video/out/aspect.h" +#include "video/out/dither.h" +#include "video/out/vo.h" + +// scale/cscale arguments that map directly to shader filter routines. +// Note that the convolution filters are not included in this list. +static const char *const fixed_scale_filters[] = { + "bilinear", + "bicubic_fast", + "oversample", + NULL +}; +static const char *const fixed_tscale_filters[] = { + "oversample", + "linear", + NULL +}; + +// must be sorted, and terminated with 0 +int filter_sizes[] = + {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0}; +int tscale_sizes[] = {2, 4, 6, 0}; // limited by TEXUNIT_VIDEO_NUM + +struct vertex_pt { + float x, y; +}; + +struct vertex { + struct vertex_pt position; + struct vertex_pt texcoord[TEXUNIT_VIDEO_NUM]; +}; + +static const struct ra_renderpass_input vertex_vao[] = { + {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, + {"texcoord0", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[0])}, + {"texcoord1", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[1])}, + {"texcoord2", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[2])}, + {"texcoord3", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[3])}, + {"texcoord4", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[4])}, + {"texcoord5", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[5])}, + {0} +}; + +struct texplane { + struct ra_tex *tex; + int w, h; + bool flipped; +}; + +struct video_image { + struct texplane planes[4]; + struct mp_image *mpi; // original input image + uint64_t id; // unique ID identifying mpi contents + bool hwdec_mapped; +}; + +enum plane_type { + PLANE_NONE = 0, + PLANE_RGB, + PLANE_LUMA, + PLANE_CHROMA, + PLANE_ALPHA, + PLANE_XYZ, +}; + +static const char *plane_names[] = { + [PLANE_NONE] = "unknown", + [PLANE_RGB] = "rgb", + [PLANE_LUMA] = "luma", + [PLANE_CHROMA] = "chroma", + [PLANE_ALPHA] = "alpha", + [PLANE_XYZ] = "xyz", +}; + +// A self-contained description of a source image which can be bound to a +// texture unit and sampled from. Contains metadata about how it's to be used +struct img_tex { + enum plane_type type; // must be set to something non-zero + int components; // number of relevant coordinates + float multiplier; // multiplier to be used when sampling + struct ra_tex *tex; + int w, h; // logical size (after transformation) + struct gl_transform transform; // rendering transformation +}; + +// A named img_tex, for user scripting purposes +struct saved_tex { + const char *name; + struct img_tex tex; +}; + +// A texture hook. This is some operation that transforms a named texture as +// soon as it's generated +struct tex_hook { + const char *save_tex; + const char *hook_tex[SHADER_MAX_HOOKS]; + const char *bind_tex[TEXUNIT_VIDEO_NUM]; + int components; // how many components are relevant (0 = same as input) + void *priv; // this gets talloc_freed when the tex_hook is removed + void (*hook)(struct gl_video *p, struct img_tex tex, // generates GLSL + struct gl_transform *trans, void *priv); + bool (*cond)(struct gl_video *p, struct img_tex tex, void *priv); +}; + +struct fbosurface { + struct fbotex fbotex; + uint64_t id; + double pts; +}; + +#define FBOSURFACES_MAX 10 + +struct cached_file { + char *path; + struct bstr body; +}; + +struct pass_info { + struct bstr desc; + struct mp_pass_perf perf; +}; + +#define PASS_INFO_MAX (SHADER_MAX_PASSES + 32) + +struct dr_buffer { + struct ra_buf *buf; + // The mpi reference will keep the data from being recycled (or from other + // references gaining write access) while the GPU is accessing the buffer. + struct mp_image *mpi; +}; + +struct gl_video { + struct ra *ra; + + struct mpv_global *global; + struct mp_log *log; + struct gl_video_opts opts; + struct m_config_cache *opts_cache; + struct gl_lcms *cms; + + int fb_depth; // actual bits available in GL main framebuffer + struct m_color clear_color; + bool force_clear_color; + + struct gl_shader_cache *sc; + + struct osd_state *osd_state; + struct mpgl_osd *osd; + double osd_pts; + + struct ra_tex *lut_3d_texture; + bool use_lut_3d; + int lut_3d_size[3]; + + struct ra_tex *dither_texture; + + struct mp_image_params real_image_params; // configured format + struct mp_image_params image_params; // texture format (mind hwdec case) + struct ra_imgfmt_desc ra_format; // texture format + int plane_count; + + bool is_gray; + bool has_alpha; + char color_swizzle[5]; + bool use_integer_conversion; + + struct video_image image; + + struct dr_buffer *dr_buffers; + int num_dr_buffers; + + bool using_dr_path; + + bool dumb_mode; + bool forced_dumb_mode; + + const struct ra_format *fbo_format; + struct fbotex merge_fbo[4]; + struct fbotex scale_fbo[4]; + struct fbotex integer_fbo[4]; + struct fbotex indirect_fbo; + struct fbotex blend_subs_fbo; + struct fbotex screen_fbo; + struct fbotex output_fbo; + struct fbosurface surfaces[FBOSURFACES_MAX]; + struct fbotex vdpau_deinterleave_fbo[2]; + struct ra_buf *hdr_peak_ssbo; + + // user pass descriptions and textures + struct tex_hook tex_hooks[SHADER_MAX_PASSES]; + int tex_hook_num; + struct gl_user_shader_tex user_textures[SHADER_MAX_PASSES]; + int user_tex_num; + + int surface_idx; + int surface_now; + int frames_drawn; + bool is_interpolated; + bool output_fbo_valid; + + // state for configured scalers + struct scaler scaler[SCALER_COUNT]; + + struct mp_csp_equalizer_state *video_eq; + + struct mp_rect src_rect; // displayed part of the source video + struct mp_rect dst_rect; // video rectangle on output window + struct mp_osd_res osd_rect; // OSD size/margins + + // temporary during rendering + struct img_tex pass_tex[TEXUNIT_VIDEO_NUM]; + struct compute_info pass_compute; // compute shader metadata for this pass + int pass_tex_num; + int texture_w, texture_h; + struct gl_transform texture_offset; // texture transform without rotation + int components; + bool use_linear; + float user_gamma; + + // pass info / metrics + struct pass_info pass_fresh[PASS_INFO_MAX]; + struct pass_info pass_redraw[PASS_INFO_MAX]; + struct pass_info *pass; + int pass_idx; + struct timer_pool *upload_timer; + struct timer_pool *blit_timer; + struct timer_pool *osd_timer; + + // intermediate textures + struct saved_tex saved_tex[SHADER_MAX_SAVED]; + int saved_tex_num; + struct fbotex hook_fbos[SHADER_MAX_SAVED]; + int hook_fbo_num; + + int frames_uploaded; + int frames_rendered; + AVLFG lfg; + + // Cached because computing it can take relatively long + int last_dither_matrix_size; + float *last_dither_matrix; + + struct cached_file *files; + int num_files; + + struct ra_hwdec *hwdec; + struct ra_hwdec_mapper *hwdec_mapper; + bool hwdec_active; + + bool dsi_warned; + bool broken_frame; // temporary error state +}; + +static const struct gl_video_opts gl_video_opts_def = { + .dither_algo = DITHER_FRUIT, + .dither_depth = -1, + .dither_size = 6, + .temporal_dither_period = 1, + .fbo_format = "auto", + .sigmoid_center = 0.75, + .sigmoid_slope = 6.5, + .scaler = { + {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .cutoff = 0.001}, // scale + {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .cutoff = 0.001}, // dscale + {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .cutoff = 0.001}, // cscale + {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .clamp = 1, }, // tscale + }, + .scaler_resizes_only = 1, + .scaler_lut_size = 6, + .interpolation_threshold = 0.0001, + .alpha_mode = ALPHA_BLEND_TILES, + .background = {0, 0, 0, 255}, + .gamma = 1.0f, + .tone_mapping = TONE_MAPPING_MOBIUS, + .tone_mapping_param = NAN, + .tone_mapping_desat = 2.0, + .early_flush = -1, +}; + +static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param); + +static int validate_window_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param); + +#define OPT_BASE_STRUCT struct gl_video_opts + +#define SCALER_OPTS(n, i) \ + OPT_STRING_VALIDATE(n, scaler[i].kernel.name, 0, validate_scaler_opt), \ + OPT_FLOAT(n"-param1", scaler[i].kernel.params[0], 0), \ + OPT_FLOAT(n"-param2", scaler[i].kernel.params[1], 0), \ + OPT_FLOAT(n"-blur", scaler[i].kernel.blur, 0), \ + OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0), \ + OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0), \ + OPT_FLOAT(n"-wparam", scaler[i].window.params[0], 0), \ + OPT_FLOAT(n"-wblur", scaler[i].window.blur, 0), \ + OPT_FLOATRANGE(n"-wtaper", scaler[i].window.taper, 0, 0.0, 1.0), \ + OPT_FLOATRANGE(n"-clamp", scaler[i].clamp, 0, 0.0, 1.0), \ + OPT_FLOATRANGE(n"-radius", scaler[i].radius, 0, 0.5, 16.0), \ + OPT_FLOATRANGE(n"-antiring", scaler[i].antiring, 0, 0.0, 1.0), \ + OPT_STRING_VALIDATE(n"-window", scaler[i].window.name, 0, validate_window_opt) + +const struct m_sub_options gl_video_conf = { + .opts = (const m_option_t[]) { + OPT_CHOICE("gpu-dumb-mode", dumb_mode, 0, + ({"auto", 0}, {"yes", 1}, {"no", -1})), + OPT_FLOATRANGE("gamma-factor", gamma, 0, 0.1, 2.0), + OPT_FLAG("gamma-auto", gamma_auto, 0), + OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), + OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), + OPT_CHOICE("tone-mapping", tone_mapping, 0, + ({"clip", TONE_MAPPING_CLIP}, + {"mobius", TONE_MAPPING_MOBIUS}, + {"reinhard", TONE_MAPPING_REINHARD}, + {"hable", TONE_MAPPING_HABLE}, + {"gamma", TONE_MAPPING_GAMMA}, + {"linear", TONE_MAPPING_LINEAR})), + OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0), + OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), + OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0), + OPT_FLAG("gamut-warning", gamut_warning, 0), + OPT_FLAG("opengl-pbo", pbo, 0), + SCALER_OPTS("scale", SCALER_SCALE), + SCALER_OPTS("dscale", SCALER_DSCALE), + SCALER_OPTS("cscale", SCALER_CSCALE), + SCALER_OPTS("tscale", SCALER_TSCALE), + OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10), + OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0), + OPT_FLAG("linear-scaling", linear_scaling, 0), + OPT_FLAG("correct-downscaling", correct_downscaling, 0), + OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0), + OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0), + OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0), + OPT_STRING("fbo-format", fbo_format, 0), + OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16, + ({"no", -1}, {"auto", 0})), + OPT_CHOICE("dither", dither_algo, 0, + ({"fruit", DITHER_FRUIT}, + {"ordered", DITHER_ORDERED}, + {"no", DITHER_NONE})), + OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8), + OPT_FLAG("temporal-dither", temporal_dither, 0), + OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128), + OPT_CHOICE("alpha", alpha_mode, 0, + ({"no", ALPHA_NO}, + {"yes", ALPHA_YES}, + {"blend", ALPHA_BLEND}, + {"blend-tiles", ALPHA_BLEND_TILES})), + OPT_FLAG("opengl-rectangle-textures", use_rectangle, 0), + OPT_COLOR("background", background, 0), + OPT_FLAG("interpolation", interpolation, 0), + OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0), + OPT_CHOICE("blend-subtitles", blend_subs, 0, + ({"no", BLEND_SUBS_NO}, + {"yes", BLEND_SUBS_YES}, + {"video", BLEND_SUBS_VIDEO})), + OPT_PATHLIST("glsl-shaders", user_shaders, 0), + OPT_CLI_ALIAS("glsl-shader", "glsl-shaders-append"), + OPT_FLAG("deband", deband, 0), + OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), + OPT_FLOAT("sharpen", unsharp, 0), + OPT_INTRANGE("gpu-tex-pad-x", tex_pad_x, 0, 0, 4096), + OPT_INTRANGE("gpu-tex-pad-y", tex_pad_y, 0, 0, 4096), + OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), + OPT_STRING("gpu-shader-cache-dir", shader_cache_dir, 0), + OPT_REPLACED("hdr-tone-mapping", "tone-mapping"), + OPT_REPLACED("opengl-shaders", "glsl-shaders"), + OPT_CLI_ALIAS("opengl-shader", "glsl-shaders-append"), + OPT_REPLACED("opengl-shader-cache-dir", "gpu-shader-cache-dir"), + OPT_REPLACED("opengl-tex-pad-x", "gpu-tex-pad-x"), + OPT_REPLACED("opengl-tex-pad-y", "gpu-tex-pad-y"), + OPT_REPLACED("opengl-fbo-format", "fbo-format"), + OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"), + OPT_REPLACED("opengl-gamma", "gpu-gamma"), + {0} + }, + .size = sizeof(struct gl_video_opts), + .defaults = &gl_video_opts_def, +}; + +static void uninit_rendering(struct gl_video *p); +static void uninit_scaler(struct gl_video *p, struct scaler *scaler); +static void check_gl_features(struct gl_video *p); +static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id); +static const char *handle_scaler_opt(const char *name, bool tscale); +static void reinit_from_options(struct gl_video *p); +static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); +static void gl_video_setup_hooks(struct gl_video *p); + +#define GLSL(x) gl_sc_add(p->sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) +#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__) +#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__) + +static struct bstr load_cached_file(struct gl_video *p, const char *path) +{ + if (!path || !path[0]) + return (struct bstr){0}; + for (int n = 0; n < p->num_files; n++) { + if (strcmp(p->files[n].path, path) == 0) + return p->files[n].body; + } + // not found -> load it + struct bstr s = stream_read_file(path, p, p->global, 1024000); // 1024 kB + if (s.len) { + struct cached_file new = { + .path = talloc_strdup(p, path), + .body = s, + }; + MP_TARRAY_APPEND(p, p->files, p->num_files, new); + return new.body; + } + return (struct bstr){0}; +} + +static void debug_check_gl(struct gl_video *p, const char *msg) +{ + if (p->ra->fns->debug_marker) + p->ra->fns->debug_marker(p->ra, msg); +} + +static void gl_video_reset_surfaces(struct gl_video *p) +{ + for (int i = 0; i < FBOSURFACES_MAX; i++) { + p->surfaces[i].id = 0; + p->surfaces[i].pts = MP_NOPTS_VALUE; + } + p->surface_idx = 0; + p->surface_now = 0; + p->frames_drawn = 0; + p->output_fbo_valid = false; +} + +static void gl_video_reset_hooks(struct gl_video *p) +{ + for (int i = 0; i < p->tex_hook_num; i++) + talloc_free(p->tex_hooks[i].priv); + + for (int i = 0; i < p->user_tex_num; i++) + ra_tex_free(p->ra, &p->user_textures[i].tex); + + p->tex_hook_num = 0; + p->user_tex_num = 0; +} + +static inline int fbosurface_wrap(int id) +{ + id = id % FBOSURFACES_MAX; + return id < 0 ? id + FBOSURFACES_MAX : id; +} + +static void reinit_osd(struct gl_video *p) +{ + mpgl_osd_destroy(p->osd); + p->osd = NULL; + if (p->osd_state) + p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state); +} + +static void uninit_rendering(struct gl_video *p) +{ + for (int n = 0; n < SCALER_COUNT; n++) + uninit_scaler(p, &p->scaler[n]); + + ra_tex_free(p->ra, &p->dither_texture); + + for (int n = 0; n < 4; n++) { + fbotex_uninit(&p->merge_fbo[n]); + fbotex_uninit(&p->scale_fbo[n]); + fbotex_uninit(&p->integer_fbo[n]); + } + + fbotex_uninit(&p->indirect_fbo); + fbotex_uninit(&p->blend_subs_fbo); + fbotex_uninit(&p->screen_fbo); + fbotex_uninit(&p->output_fbo); + + for (int n = 0; n < FBOSURFACES_MAX; n++) + fbotex_uninit(&p->surfaces[n].fbotex); + + for (int n = 0; n < SHADER_MAX_SAVED; n++) + fbotex_uninit(&p->hook_fbos[n]); + + for (int n = 0; n < 2; n++) + fbotex_uninit(&p->vdpau_deinterleave_fbo[n]); + + gl_video_reset_surfaces(p); + gl_video_reset_hooks(p); + + gl_sc_reset_error(p->sc); +} + +bool gl_video_gamma_auto_enabled(struct gl_video *p) +{ + return p->opts.gamma_auto; +} + +struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p) +{ + return (struct mp_colorspace) { + .primaries = p->opts.target_prim, + .gamma = p->opts.target_trc, + }; +} + +// Warning: profile.start must point to a ta allocation, and the function +// takes over ownership. +void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) +{ + if (gl_lcms_set_memory_profile(p->cms, icc_data)) + reinit_from_options(p); +} + +bool gl_video_icc_auto_enabled(struct gl_video *p) +{ + return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; +} + +static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, + enum mp_csp_trc trc) +{ + if (!p->use_lut_3d) + return false; + + struct AVBufferRef *icc = NULL; + if (p->image.mpi) + icc = p->image.mpi->icc_profile; + + if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc)) + return true; + + // GLES3 doesn't provide filtered 16 bit integer textures + // GLES2 doesn't even provide 3D textures + const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); + if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) { + p->use_lut_3d = false; + MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n"); + return false; + } + + struct lut3d *lut3d = NULL; + if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) { + p->use_lut_3d = false; + return false; + } + + ra_tex_free(p->ra, &p->lut_3d_texture); + + struct ra_tex_params params = { + .dimensions = 3, + .w = lut3d->size[0], + .h = lut3d->size[1], + .d = lut3d->size[2], + .format = fmt, + .render_src = true, + .src_linear = true, + .initial_data = lut3d->data, + }; + p->lut_3d_texture = ra_tex_create(p->ra, ¶ms); + + debug_check_gl(p, "after 3d lut creation"); + + for (int i = 0; i < 3; i++) + p->lut_3d_size[i] = lut3d->size[i]; + + talloc_free(lut3d); + + return true; +} + +// Fill an img_tex struct from an FBO + some metadata +static struct img_tex img_tex_fbo(struct fbotex *fbo, enum plane_type type, + int components) +{ + assert(type != PLANE_NONE); + return (struct img_tex){ + .type = type, + .tex = fbo->tex, + .multiplier = 1.0, + .w = fbo->lw, + .h = fbo->lh, + .transform = identity_trans, + .components = components, + }; +} + +// Bind an img_tex to a free texture unit and return its ID. At most +// TEXUNIT_VIDEO_NUM texture units can be bound at once +static int pass_bind(struct gl_video *p, struct img_tex tex) +{ + assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM); + p->pass_tex[p->pass_tex_num] = tex; + return p->pass_tex_num++; +} + +// Rotation by 90° and flipping. +// w/h is used for recentering. +static void get_transform(float w, float h, int rotate, bool flip, + struct gl_transform *out_tr) +{ + int a = rotate % 90 ? 0 : rotate / 90; + int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc. + int cos90[4] = {1, 0, -1, 0}; + struct gl_transform tr = {{{ cos90[a], sin90[a]}, + {-sin90[a], cos90[a]}}}; + + // basically, recenter to keep the whole image in view + float b[2] = {1, 1}; + gl_transform_vec(tr, &b[0], &b[1]); + tr.t[0] += b[0] < 0 ? w : 0; + tr.t[1] += b[1] < 0 ? h : 0; + + if (flip) { + struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}}; + gl_transform_trans(fliptr, &tr); + } + + *out_tr = tr; +} + +// Return the chroma plane upscaled to luma size, but with additional padding +// for image sizes not aligned to subsampling. +static int chroma_upsize(int size, int pixel) +{ + return (size + pixel - 1) / pixel * pixel; +} + +// If a and b are on the same plane, return what plane type should be used. +// If a or b are none, the other type always wins. +// Usually: LUMA/RGB/XYZ > CHROMA > ALPHA +static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b) +{ + if (a == PLANE_NONE) + return b; + if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ) + return b; + if (b != PLANE_NONE && a == PLANE_ALPHA) + return b; + return a; +} + +// Places a video_image's image textures + associated metadata into tex[]. The +// number of textures is equal to p->plane_count. Any necessary plane offsets +// are stored in off. (e.g. chroma position) +static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, + struct img_tex tex[4], struct gl_transform off[4]) +{ + assert(vimg->mpi); + + int w = p->image_params.w; + int h = p->image_params.h; + + // Determine the chroma offset + float ls_w = 1.0 / p->ra_format.chroma_w; + float ls_h = 1.0 / p->ra_format.chroma_h; + + struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; + + if (p->image_params.chroma_location != MP_CHROMA_CENTER) { + int cx, cy; + mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy); + // By default texture coordinates are such that chroma is centered with + // any chroma subsampling. If a specific direction is given, make it + // so that the luma and chroma sample line up exactly. + // For 4:4:4, setting chroma location should have no effect at all. + // luma sample size (in chroma coord. space) + chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; + chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + } + + int msb_valid_bits = + p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); + // The existing code assumes we just have a single tex multiplier for + // all of the planes. This may change in the future + float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.color.space, + msb_valid_bits, + p->ra_format.component_bits); + + memset(tex, 0, 4 * sizeof(tex[0])); + for (int n = 0; n < p->plane_count; n++) { + struct texplane *t = &vimg->planes[n]; + + enum plane_type type = PLANE_NONE; + for (int i = 0; i < 4; i++) { + int c = p->ra_format.components[n][i]; + enum plane_type ctype; + if (c == 0) { + ctype = PLANE_NONE; + } else if (c == 4) { + ctype = PLANE_ALPHA; + } else if (p->image_params.color.space == MP_CSP_RGB) { + ctype = PLANE_RGB; + } else if (p->image_params.color.space == MP_CSP_XYZ) { + ctype = PLANE_XYZ; + } else { + ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA; + } + type = merge_plane_types(type, ctype); + } + + tex[n] = (struct img_tex){ + .type = type, + .tex = t->tex, + .multiplier = tex_mul, + .w = t->w, + .h = t->h, + }; + + for (int i = 0; i < 4; i++) + tex[n].components += !!p->ra_format.components[n][i]; + + get_transform(t->w, t->h, p->image_params.rotate, t->flipped, + &tex[n].transform); + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, tex[n].w, tex[n].h); + + off[n] = identity_trans; + + if (type == PLANE_CHROMA) { + struct gl_transform rot; + get_transform(0, 0, p->image_params.rotate, true, &rot); + + struct gl_transform tr = chroma; + gl_transform_vec(rot, &tr.t[0], &tr.t[1]); + + float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w; + float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h; + + // Adjust the chroma offset if the real chroma size is fractional + // due image sizes not aligned to chroma subsampling. + struct gl_transform rot2; + get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2); + if (rot2.m[0][0] < 0) + tr.t[0] += dx; + if (rot2.m[1][0] < 0) + tr.t[0] += dy; + if (rot2.m[0][1] < 0) + tr.t[1] += dx; + if (rot2.m[1][1] < 0) + tr.t[1] += dy; + + off[n] = tr; + } + } +} + +// Return the index of the given component (assuming all non-padding components +// of all planes are concatenated into a linear list). +static int find_comp(struct ra_imgfmt_desc *desc, int component) +{ + int cur = 0; + for (int n = 0; n < desc->num_planes; n++) { + for (int i = 0; i < 4; i++) { + if (desc->components[n][i]) { + if (desc->components[n][i] == component) + return cur; + cur++; + } + } + } + return -1; +} + +static void init_video(struct gl_video *p) +{ + p->use_integer_conversion = false; + + if (p->hwdec && ra_hwdec_test_format(p->hwdec, p->image_params.imgfmt)) { + if (p->hwdec->driver->overlay_frame) { + MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " + "on the video!\n"); + } else { + p->hwdec_mapper = ra_hwdec_mapper_create(p->hwdec, &p->image_params); + if (!p->hwdec_mapper) + MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); + } + if (p->hwdec_mapper) + p->image_params = p->hwdec_mapper->dst_params; + const char **exts = p->hwdec->glsl_extensions; + for (int n = 0; exts && exts[n]; n++) + gl_sc_enable_extension(p->sc, (char *)exts[n]); + p->hwdec_active = true; + } + + p->ra_format = (struct ra_imgfmt_desc){0}; + ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format); + + p->plane_count = p->ra_format.num_planes; + + p->has_alpha = false; + p->is_gray = true; + + for (int n = 0; n < p->ra_format.num_planes; n++) { + for (int i = 0; i < 4; i++) { + if (p->ra_format.components[n][i]) { + p->has_alpha |= p->ra_format.components[n][i] == 4; + p->is_gray &= p->ra_format.components[n][i] == 1 || + p->ra_format.components[n][i] == 4; + } + } + } + + for (int c = 0; c < 4; c++) { + int loc = find_comp(&p->ra_format, c + 1); + p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0]; + } + p->color_swizzle[4] = '\0'; + + // Format-dependent checks. + check_gl_features(p); + + mp_image_params_guess_csp(&p->image_params); + + av_lfg_init(&p->lfg, 1); + + debug_check_gl(p, "before video texture creation"); + + if (!p->hwdec_active) { + struct video_image *vimg = &p->image; + + struct mp_image layout = {0}; + mp_image_set_params(&layout, &p->image_params); + + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + const struct ra_format *format = p->ra_format.planes[n]; + + plane->w = mp_image_plane_w(&layout, n); + plane->h = mp_image_plane_h(&layout, n); + + struct ra_tex_params params = { + .dimensions = 2, + .w = plane->w + p->opts.tex_pad_x, + .h = plane->h + p->opts.tex_pad_y, + .d = 1, + .format = format, + .render_src = true, + .src_linear = format->linear_filter, + .non_normalized = p->opts.use_rectangle, + .host_mutable = true, + }; + + MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, + params.w, params.h); + + plane->tex = ra_tex_create(p->ra, ¶ms); + if (!plane->tex) + abort(); // shit happens + + p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; + } + } + + debug_check_gl(p, "after video texture creation"); + + gl_video_setup_hooks(p); +} + +// Release any texture mappings associated with the current frame. +static void unmap_current_image(struct gl_video *p) +{ + struct video_image *vimg = &p->image; + + if (vimg->hwdec_mapped) { + assert(p->hwdec_active && p->hwdec_mapper); + ra_hwdec_mapper_unmap(p->hwdec_mapper); + memset(vimg->planes, 0, sizeof(vimg->planes)); + vimg->hwdec_mapped = false; + vimg->id = 0; // needs to be mapped again + } +} + +static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) +{ + for (int i = 0; i < p->num_dr_buffers; i++) { + struct dr_buffer *buffer = &p->dr_buffers[i]; + uint8_t *bufptr = buffer->buf->data; + size_t size = buffer->buf->params.size; + if (ptr >= bufptr && ptr < bufptr + size) + return buffer; + } + + return NULL; +} + +static void gc_pending_dr_fences(struct gl_video *p, bool force) +{ +again:; + for (int n = 0; n < p->num_dr_buffers; n++) { + struct dr_buffer *buffer = &p->dr_buffers[n]; + if (!buffer->mpi) + continue; + + bool res = p->ra->fns->buf_poll(p->ra, buffer->buf); + if (res || force) { + // Unreferencing the image could cause gl_video_dr_free_buffer() + // to be called by the talloc destructor (if it was the last + // reference). This will implicitly invalidate the buffer pointer + // and change the p->dr_buffers array. To make it worse, it could + // free multiple dr_buffers due to weird theoretical corner cases. + // This is also why we use the goto to iterate again from the + // start, because everything gets fucked up. Hail satan! + struct mp_image *ref = buffer->mpi; + buffer->mpi = NULL; + talloc_free(ref); + goto again; + } + } +} + +static void unref_current_image(struct gl_video *p) +{ + unmap_current_image(p); + p->image.id = 0; + + mp_image_unrefp(&p->image.mpi); + + // While we're at it, also garbage collect pending fences in here to + // get it out of the way. + gc_pending_dr_fences(p, false); +} + +// If overlay mode is used, make sure to remove the overlay. +// Be careful with this. Removing the overlay and adding another one will +// lead to flickering artifacts. +static void unmap_overlay(struct gl_video *p) +{ + if (p->hwdec_active && p->hwdec->driver->overlay_frame) + p->hwdec->driver->overlay_frame(p->hwdec, NULL, NULL, NULL, true); +} + +static void uninit_video(struct gl_video *p) +{ + uninit_rendering(p); + + struct video_image *vimg = &p->image; + + unmap_overlay(p); + unref_current_image(p); + + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + ra_tex_free(p->ra, &plane->tex); + } + *vimg = (struct video_image){0}; + + // Invalidate image_params to ensure that gl_video_config() will call + // init_video() on uninitialized gl_video. + p->real_image_params = (struct mp_image_params){0}; + p->image_params = p->real_image_params; + p->hwdec_active = false; + ra_hwdec_mapper_free(&p->hwdec_mapper); +} + +static void pass_record(struct gl_video *p, struct mp_pass_perf perf) +{ + if (!p->pass || p->pass_idx == PASS_INFO_MAX) + return; + + struct pass_info *pass = &p->pass[p->pass_idx]; + pass->perf = perf; + + if (pass->desc.len == 0) + bstr_xappend(p, &pass->desc, bstr0("(unknown)")); + + p->pass_idx++; +} + +PRINTF_A |