summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/video.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r--video/out/opengl/video.c2930
1 files changed, 2930 insertions, 0 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
new file mode 100644
index 0000000000..c0c802c580
--- /dev/null
+++ b/video/out/opengl/video.c
@@ -0,0 +1,2930 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can alternatively redistribute this file and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include <libavutil/common.h>
+#include <libavutil/lfg.h>
+
+#include "video.h"
+
+#include "misc/bstr.h"
+#include "common.h"
+#include "utils.h"
+#include "hwdec.h"
+#include "osd.h"
+#include "video/out/filter_kernels.h"
+#include "video/out/aspect.h"
+#include "video/out/bitmap_packer.h"
+#include "video/out/dither.h"
+#include "video/out/vo.h"
+
+// Pixel width of 1D lookup textures.
+#define LOOKUP_TEXTURE_SIZE 256
+
+// Texture units 0-5 are used by the video, and for free use by the passes
+#define TEXUNIT_VIDEO_NUM 6
+
+// Other texture units are reserved for specific purposes
+#define TEXUNIT_SCALERS TEXUNIT_VIDEO_NUM
+#define TEXUNIT_3DLUT (TEXUNIT_SCALERS+4)
+#define TEXUNIT_DITHER (TEXUNIT_3DLUT+1)
+
+// scale/cscale arguments that map directly to shader filter routines.
+// Note that the convolution filters are not included in this list.
+static const char *const fixed_scale_filters[] = {
+ "bilinear",
+ "bicubic_fast",
+ "sharpen3",
+ "sharpen5",
+ "oversample",
+ "custom",
+ NULL
+};
+static const char *const fixed_tscale_filters[] = {
+ "oversample",
+ NULL
+};
+
+// must be sorted, and terminated with 0
+int filter_sizes[] =
+ {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0};
+int tscale_sizes[] = {2, 4, 6, 0}; // limited by TEXUNIT_VIDEO_NUM
+
+struct vertex_pt {
+ float x, y;
+};
+
+struct vertex {
+ struct vertex_pt position;
+ struct vertex_pt texcoord[TEXUNIT_VIDEO_NUM];
+};
+
+static const struct gl_vao_entry vertex_vao[] = {
+ {"position", 2, GL_FLOAT, false, offsetof(struct vertex, position)},
+ {"texcoord0", 2, GL_FLOAT, false, offsetof(struct vertex, texcoord[0])},
+ {"texcoord1", 2, GL_FLOAT, false, offsetof(struct vertex, texcoord[1])},
+ {"texcoord2", 2, GL_FLOAT, false, offsetof(struct vertex, texcoord[2])},
+ {"texcoord3", 2, GL_FLOAT, false, offsetof(struct vertex, texcoord[3])},
+ {"texcoord4", 2, GL_FLOAT, false, offsetof(struct vertex, texcoord[4])},
+ {"texcoord5", 2, GL_FLOAT, false, offsetof(struct vertex, texcoord[5])},
+ {0}
+};
+
+struct texplane {
+ int w, h;
+ GLint gl_internal_format;
+ GLenum gl_target;
+ GLenum gl_format;
+ GLenum gl_type;
+ GLuint gl_texture;
+ int gl_buffer;
+};
+
+struct video_image {
+ struct texplane planes[4];
+ bool image_flipped;
+ struct mp_image *mpi; // original input image
+};
+
+struct scaler {
+ int index;
+ struct scaler_config conf;
+ double scale_factor;
+ bool initialized;
+ struct filter_kernel *kernel;
+ GLuint gl_lut;
+ GLenum gl_target;
+ struct fbotex sep_fbo;
+ bool insufficient;
+
+ // kernel points here
+ struct filter_kernel kernel_storage;
+};
+
+struct fbosurface {
+ struct fbotex fbotex;
+ double pts;
+};
+
+#define FBOSURFACES_MAX 10
+
+struct src_tex {
+ GLuint gl_tex;
+ GLenum gl_target;
+ int w, h;
+ struct mp_rect_f src;
+};
+
+struct gl_video {
+ GL *gl;
+
+ struct mpv_global *global;
+ struct mp_log *log;
+ struct gl_video_opts opts;
+ bool gl_debug;
+
+ int depth_g;
+ int texture_16bit_depth; // actual bits available in 16 bit textures
+
+ struct gl_shader_cache *sc;
+
+ GLenum gl_target; // texture target (GL_TEXTURE_2D, ...) for video and FBOs
+
+ struct gl_vao vao;
+
+ struct osd_state *osd_state;
+ struct mpgl_osd *osd;
+ double osd_pts;
+
+ GLuint lut_3d_texture;
+ bool use_lut_3d;
+
+ GLuint dither_texture;
+ int dither_size;
+
+ struct mp_image_params real_image_params; // configured format
+ struct mp_image_params image_params; // texture format (mind hwdec case)
+ struct mp_imgfmt_desc image_desc;
+ int plane_count;
+ int image_w, image_h;
+
+ bool is_yuv, is_rgb, is_packed_yuv;
+ bool has_alpha;
+ char color_swizzle[5];
+
+ struct video_image image;
+
+ struct fbotex chroma_merge_fbo;
+ struct fbotex source_fbo;
+ struct fbotex indirect_fbo;
+ struct fbotex blend_subs_fbo;
+ struct fbosurface surfaces[FBOSURFACES_MAX];
+
+ // these are duplicated so we can keep rendering back and forth between
+ // them to support an unlimited number of shader passes per step
+ struct fbotex pre_fbo[2];
+ struct fbotex post_fbo[2];
+
+ int surface_idx;
+ int surface_now;
+ int frames_drawn;
+ bool is_interpolated;
+
+ // state for luma (0), luma-down(1), chroma (2) and temporal (3) scalers
+ struct scaler scaler[4];
+
+ struct mp_csp_equalizer video_eq;
+
+ struct mp_rect src_rect; // displayed part of the source video
+ struct mp_rect dst_rect; // video rectangle on output window
+ struct mp_osd_res osd_rect; // OSD size/margins
+ int vp_w, vp_h;
+
+ // temporary during rendering
+ struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ bool use_linear;
+ bool use_normalized_range;
+ float user_gamma;
+
+ int frames_uploaded;
+ int frames_rendered;
+ AVLFG lfg;
+
+ // Cached because computing it can take relatively long
+ int last_dither_matrix_size;
+ float *last_dither_matrix;
+
+ struct gl_hwdec *hwdec;
+ bool hwdec_active;
+};
+
+struct fmt_entry {
+ int mp_format;
+ GLint internal_format;
+ GLenum format;
+ GLenum type;
+};
+
+// Very special formats, for which OpenGL happens to have direct support
+static const struct fmt_entry mp_to_gl_formats[] = {
+ {IMGFMT_BGR555, GL_RGBA, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+ {IMGFMT_BGR565, GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},
+ {IMGFMT_RGB555, GL_RGBA, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+ {IMGFMT_RGB565, GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+ {0},
+};
+
+static const struct fmt_entry gl_byte_formats[] = {
+ {0, GL_RED, GL_RED, GL_UNSIGNED_BYTE}, // 1 x 8
+ {0, GL_RG, GL_RG, GL_UNSIGNED_BYTE}, // 2 x 8
+ {0, GL_RGB, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8
+ {0, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8
+ {0, GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // 1 x 16
+ {0, GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // 2 x 16
+ {0, GL_RGB16, GL_RGB, GL_UNSIGNED_SHORT}, // 3 x 16
+ {0, GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // 4 x 16
+};
+
+static const struct fmt_entry gl_byte_formats_gles3[] = {
+ {0, GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // 1 x 8
+ {0, GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // 2 x 8
+ {0, GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8
+ {0, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8
+ // There are no filterable texture formats that can be uploaded as
+ // GL_UNSIGNED_SHORT, so apparently we're out of luck.
+ {0, 0, 0, 0}, // 1 x 16
+ {0, 0, 0, 0}, // 2 x 16
+ {0, 0, 0, 0}, // 3 x 16
+ {0, 0, 0, 0}, // 4 x 16
+};
+
+static const struct fmt_entry gl_byte_formats_gles2[] = {
+ {0, GL_LUMINANCE, GL_LUMINANCE, GL_UNSIGNED_BYTE}, // 1 x 8
+ {0, GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8
+ {0, GL_RGB, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8
+ {0, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8
+ {0, 0, 0, 0}, // 1 x 16
+ {0, 0, 0, 0}, // 2 x 16
+ {0, 0, 0, 0}, // 3 x 16
+ {0, 0, 0, 0}, // 4 x 16
+};
+
+static const struct fmt_entry gl_byte_formats_legacy[] = {
+ {0, GL_LUMINANCE, GL_LUMINANCE, GL_UNSIGNED_BYTE}, // 1 x 8
+ {0, GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8
+ {0, GL_RGB, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8
+ {0, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8
+ {0, GL_LUMINANCE16, GL_LUMINANCE, GL_UNSIGNED_SHORT},// 1 x 16
+ {0, GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, GL_UNSIGNED_SHORT},// 2 x 16
+ {0, GL_RGB16, GL_RGB, GL_UNSIGNED_SHORT},// 3 x 16
+ {0, GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},// 4 x 16
+};
+
+static const struct fmt_entry gl_float16_formats[] = {
+ {0, GL_R16F, GL_RED, GL_FLOAT}, // 1 x f
+ {0, GL_RG16F, GL_RG, GL_FLOAT}, // 2 x f
+ {0, GL_RGB16F, GL_RGB, GL_FLOAT}, // 3 x f
+ {0, GL_RGBA16F, GL_RGBA, GL_FLOAT}, // 4 x f
+};
+
+static const struct fmt_entry gl_apple_formats[] = {
+ {IMGFMT_UYVY, GL_RGB, GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE},
+ {IMGFMT_YUYV, GL_RGB, GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_REV_APPLE},
+ {0}
+};
+
+struct packed_fmt_entry {
+ int fmt;
+ int8_t component_size;
+ int8_t components[4]; // source component - 0 means unmapped
+};
+
+static const struct packed_fmt_entry mp_packed_formats[] = {
+ // w R G B A
+ {IMGFMT_Y8, 1, {1, 0, 0, 0}},
+ {IMGFMT_Y16, 2, {1, 0, 0, 0}},
+ {IMGFMT_YA8, 1, {1, 0, 0, 2}},
+ {IMGFMT_YA16, 2, {1, 0, 0, 2}},
+ {IMGFMT_ARGB, 1, {2, 3, 4, 1}},
+ {IMGFMT_0RGB, 1, {2, 3, 4, 0}},
+ {IMGFMT_BGRA, 1, {3, 2, 1, 4}},
+ {IMGFMT_BGR0, 1, {3, 2, 1, 0}},
+ {IMGFMT_ABGR, 1, {4, 3, 2, 1}},
+ {IMGFMT_0BGR, 1, {4, 3, 2, 0}},
+ {IMGFMT_RGBA, 1, {1, 2, 3, 4}},
+ {IMGFMT_RGB0, 1, {1, 2, 3, 0}},
+ {IMGFMT_BGR24, 1, {3, 2, 1, 0}},
+ {IMGFMT_RGB24, 1, {1, 2, 3, 0}},
+ {IMGFMT_RGB48, 2, {1, 2, 3, 0}},
+ {IMGFMT_RGBA64, 2, {1, 2, 3, 4}},
+ {IMGFMT_BGRA64, 2, {3, 2, 1, 4}},
+ {0},
+};
+
+const struct gl_video_opts gl_video_opts_def = {
+ .dither_depth = -1,
+ .dither_size = 6,
+ .temporal_dither_period = 1,
+ .fbo_format = GL_RGBA16,
+ .sigmoid_center = 0.75,
+ .sigmoid_slope = 6.5,
+ .scaler = {
+ {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // scale
+ {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // dscale
+ {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // cscale
+ {{"oversample", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // tscale
+ },
+ .alpha_mode = 2,
+ .background = {0, 0, 0, 255},
+ .gamma = 1.0f,
+};
+
+const struct gl_video_opts gl_video_opts_hq_def = {
+ .dither_depth = 0,
+ .dither_size = 6,
+ .temporal_dither_period = 1,
+ .fbo_format = GL_RGBA16,
+ .fancy_downscaling = 1,
+ .sigmoid_center = 0.75,
+ .sigmoid_slope = 6.5,
+ .sigmoid_upscaling = 1,
+ .scaler = {
+ {{"spline36", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // scale
+ {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // dscale
+ {{"spline36", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // cscale
+ {{"oversample", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // tscale
+ },
+ .alpha_mode = 2,
+ .background = {0, 0, 0, 255},
+ .gamma = 1.0f,
+ .blend_subs = 0,
+ .pbo = 1,
+};
+
+static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
+ struct bstr name, struct bstr param);
+
+static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
+ struct bstr name, struct bstr param);
+
+#define OPT_BASE_STRUCT struct gl_video_opts
+const struct m_sub_options gl_video_conf = {
+ .opts = (const m_option_t[]) {
+ OPT_FLAG("dumb-mode", dumb_mode, 0),
+ OPT_FLOATRANGE("gamma", gamma, 0, 0.1, 2.0),
+ OPT_FLAG("gamma-auto", gamma_auto, 0),
+ OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
+ OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
+ OPT_FLAG("pbo", pbo, 0),
+ OPT_STRING_VALIDATE("scale", scaler[0].kernel.name, 0, validate_scaler_opt),
+ OPT_STRING_VALIDATE("dscale", scaler[1].kernel.name, 0, validate_scaler_opt),
+ OPT_STRING_VALIDATE("cscale", scaler[2].kernel.name, 0, validate_scaler_opt),
+ OPT_STRING_VALIDATE("tscale", scaler[3].kernel.name, 0, validate_scaler_opt),
+ OPT_FLOAT("scale-param1", scaler[0].kernel.params[0], 0),
+ OPT_FLOAT("scale-param2", scaler[0].kernel.params[1], 0),
+ OPT_FLOAT("dscale-param1", scaler[1].kernel.params[0], 0),
+ OPT_FLOAT("dscale-param2", scaler[1].kernel.params[1], 0),
+ OPT_FLOAT("cscale-param1", scaler[2].kernel.params[0], 0),
+ OPT_FLOAT("cscale-param2", scaler[2].kernel.params[1], 0),
+ OPT_FLOAT("tscale-param1", scaler[3].kernel.params[0], 0),
+ OPT_FLOAT("tscale-param2", scaler[3].kernel.params[1], 0),
+ OPT_FLOAT("scale-blur", scaler[0].kernel.blur, 0),
+ OPT_FLOAT("dscale-blur", scaler[1].kernel.blur, 0),
+ OPT_FLOAT("cscale-blur", scaler[2].kernel.blur, 0),
+ OPT_FLOAT("tscale-blur", scaler[3].kernel.blur, 0),
+ OPT_STRING_VALIDATE("scale-window", scaler[0].window.name, 0, validate_window_opt),
+ OPT_STRING_VALIDATE("dscale-window", scaler[1].window.name, 0, validate_window_opt),
+ OPT_STRING_VALIDATE("cscale-window", scaler[2].window.name, 0, validate_window_opt),
+ OPT_STRING_VALIDATE("tscale-window", scaler[3].window.name, 0, validate_window_opt),
+ OPT_FLOAT("scale-wparam", scaler[0].window.params[0], 0),
+ OPT_FLOAT("dscale-wparam", scaler[1].window.params[0], 0),
+ OPT_FLOAT("cscale-wparam", scaler[2].window.params[0], 0),
+ OPT_FLOAT("tscale-wparam", scaler[3].window.params[0], 0),
+ OPT_FLOATRANGE("scale-radius", scaler[0].radius, 0, 0.5, 16.0),
+ OPT_FLOATRANGE("dscale-radius", scaler[1].radius, 0, 0.5, 16.0),
+ OPT_FLOATRANGE("cscale-radius", scaler[2].radius, 0, 0.5, 16.0),
+ OPT_FLOATRANGE("tscale-radius", scaler[3].radius, 0, 0.5, 3.0),
+ OPT_FLOATRANGE("scale-antiring", scaler[0].antiring, 0, 0.0, 1.0),
+ OPT_FLOATRANGE("dscale-antiring", scaler[1].antiring, 0, 0.0, 1.0),
+ OPT_FLOATRANGE("cscale-antiring", scaler[2].antiring, 0, 0.0, 1.0),
+ OPT_FLOATRANGE("tscale-antiring", scaler[3].antiring, 0, 0.0, 1.0),
+ OPT_FLAG("tscale-clamp", scaler[3].clamp, 0),
+ OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0),
+ OPT_FLAG("linear-scaling", linear_scaling, 0),
+ OPT_FLAG("fancy-downscaling", fancy_downscaling, 0),
+ OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0),
+ OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0),
+ OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0),
+ OPT_CHOICE("fbo-format", fbo_format, 0,
+ ({"rgb", GL_RGB},
+ {"rgba", GL_RGBA},
+ {"rgb8", GL_RGB8},
+ {"rgb10", GL_RGB10},
+ {"rgb10_a2", GL_RGB10_A2},
+ {"rgb16", GL_RGB16},
+ {"rgb16f", GL_RGB16F},
+ {"rgb32f", GL_RGB32F},
+ {"rgba12", GL_RGBA12},
+ {"rgba16", GL_RGBA16},
+ {"rgba16f", GL_RGBA16F},
+ {"rgba32f", GL_RGBA32F})),
+ OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
+ ({"no", -1}, {"auto", 0})),
+ OPT_CHOICE("dither", dither_algo, 0,
+ ({"fruit", 0}, {"ordered", 1}, {"no", -1})),
+ OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
+ OPT_FLAG("temporal-dither", temporal_dither, 0),
+ OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128),
+ OPT_CHOICE("alpha", alpha_mode, 0,
+ ({"no", 0},
+ {"yes", 1},
+ {"blend", 2})),
+ OPT_FLAG("rectangle-textures", use_rectangle, 0),
+ OPT_COLOR("background", background, 0),
+ OPT_FLAG("interpolation", interpolation, 0),
+ OPT_CHOICE("blend-subtitles", blend_subs, 0,
+ ({"no", 0},
+ {"yes", 1},
+ {"video", 2})),
+ OPT_STRING("source-shader", source_shader, 0),
+ OPT_STRING("scale-shader", scale_shader, 0),
+ OPT_STRINGLIST("pre-shaders", pre_shaders, 0),
+ OPT_STRINGLIST("post-shaders", post_shaders, 0),
+
+ OPT_REMOVED("approx-gamma", "this is always enabled now"),
+ OPT_REMOVED("cscale-down", "chroma is never downscaled"),
+ OPT_REMOVED("scale-sep", "this is set automatically whenever sane"),
+ OPT_REMOVED("indirect", "this is set automatically whenever sane"),
+ OPT_REMOVED("srgb", "use target-prim=bt709:target-trc=srgb instead"),
+
+ OPT_REPLACED("lscale", "scale"),
+ OPT_REPLACED("lscale-down", "scale-down"),
+ OPT_REPLACED("lparam1", "scale-param1"),
+ OPT_REPLACED("lparam2", "scale-param2"),
+ OPT_REPLACED("lradius", "scale-radius"),
+ OPT_REPLACED("lantiring", "scale-antiring"),
+ OPT_REPLACED("cparam1", "cscale-param1"),
+ OPT_REPLACED("cparam2", "cscale-param2"),
+ OPT_REPLACED("cradius", "cscale-radius"),
+ OPT_REPLACED("cantiring", "cscale-antiring"),
+ OPT_REPLACED("smoothmotion", "interpolation"),
+ OPT_REPLACED("smoothmotion-threshold", "tscale-param1"),
+ OPT_REPLACED("scale-down", "dscale"),
+
+ {0}
+ },
+ .size = sizeof(struct gl_video_opts),
+ .defaults = &gl_video_opts_def,
+};
+
+static void uninit_rendering(struct gl_video *p);
+static void uninit_scaler(struct gl_video *p, struct scaler *scaler);
+static void check_gl_features(struct gl_video *p);
+static bool init_format(int fmt, struct gl_video *init);
+static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi);
+static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src);
+
+#define GLSL(x) gl_sc_add(p->sc, #x "\n");
+#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
+
+static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp,
+ int n_channels)
+{
+ assert(bytes_per_comp == 1 || bytes_per_comp == 2);
+ assert(n_channels >= 1 && n_channels <= 4);
+ const struct fmt_entry *fmts = gl_byte_formats;
+ if (gl->es >= 300) {
+ fmts = gl_byte_formats_gles3;
+ } else if (gl->es) {
+ fmts = gl_byte_formats_gles2;
+ } else if (!(gl->mpgl_caps & MPGL_CAP_TEX_RG)) {
+ fmts = gl_byte_formats_legacy;
+ }
+ return &fmts[n_channels - 1 + (bytes_per_comp - 1) * 4];
+}
+
+static void debug_check_gl(struct gl_video *p, const char *msg)
+{
+ if (p->gl_debug)
+ glCheckError(p->gl, p->log, msg);
+}
+
+void gl_video_set_debug(struct gl_video *p, bool enable)
+{
+ GL *gl = p->gl;
+
+ p->gl_debug = enable;
+ if (p->gl->debug_context)
+ gl_set_debug_logger(gl, enable ? p->log : NULL);
+}
+
+static void gl_video_reset_surfaces(struct gl_video *p)
+{
+ for (int i = 0; i < FBOSURFACES_MAX; i++) {
+ p->surfaces[i].pts = MP_NOPTS_VALUE;
+ }
+ p->surface_idx = 0;
+ p->surface_now = 0;
+ p->frames_drawn = 0;
+}
+
+static inline int fbosurface_wrap(int id)
+{
+ id = id % FBOSURFACES_MAX;
+ return id < 0 ? id + FBOSURFACES_MAX : id;
+}
+
+static void recreate_osd(struct gl_video *p)
+{
+ mpgl_osd_destroy(p->osd);
+ p->osd = NULL;
+ if (p->osd_state) {
+ p->osd = mpgl_osd_init(p->gl, p->log, p->osd_state);
+ mpgl_osd_set_options(p->osd, p->opts.pbo);
+ }
+}
+
+static void reinit_rendering(struct gl_video *p)
+{
+ MP_VERBOSE(p, "Reinit rendering.\n");
+
+ debug_check_gl(p, "before scaler initialization");
+
+ uninit_rendering(p);
+
+ recreate_osd(p);
+}
+
+static void uninit_rendering(struct gl_video *p)
+{
+ GL *gl = p->gl;
+
+ for (int n = 0; n < 4; n++)
+ uninit_scaler(p, &p->scaler[n]);
+
+ gl->DeleteTextures(1, &p->dither_texture);
+ p->dither_texture = 0;
+
+ fbotex_uninit(&p->chroma_merge_fbo);
+ fbotex_uninit(&p->source_fbo);
+ fbotex_uninit(&p->indirect_fbo);
+ fbotex_uninit(&p->blend_subs_fbo);
+
+ for (int n = 0; n < 2; n++) {
+ fbotex_uninit(&p->pre_fbo[n]);
+ fbotex_uninit(&p->post_fbo[n]);
+ }
+
+ for (int n = 0; n < FBOSURFACES_MAX; n++)
+ fbotex_uninit(&p->surfaces[n].fbotex);
+
+ gl_video_reset_surfaces(p);
+}
+
+void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
+{
+ GL *gl = p->gl;
+
+ if (!lut3d) {
+ if (p->use_lut_3d) {
+ p->use_lut_3d = false;
+ reinit_rendering(p);
+ }
+ return;
+ }
+
+ if (!(gl->mpgl_caps & MPGL_CAP_3D_TEX))
+ return;
+
+ if (!p->lut_3d_texture)
+ gl->GenTextures(1, &p->lut_3d_texture);
+
+ gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_3DLUT);
+ gl->BindTexture(GL_TEXTURE_3D, p->lut_3d_texture);
+ gl->TexImage3D(GL_TEXTURE_3D, 0, GL_RGB16, lut3d->size[0], lut3d->size[1],
+ lut3d->size[2], 0, GL_RGB, GL_UNSIGNED_SHORT, lut3d->data);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
+ gl->ActiveTexture(GL_TEXTURE0);
+
+ p->use_lut_3d = true;
+ check_gl_features(p);
+
+ debug_check_gl(p, "after 3d lut creation");
+
+ reinit_rendering(p);
+}
+
+static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo,
+ int w, int h, int id)
+{
+ p->pass_tex[id] = (struct src_tex){
+ .gl_tex = src_fbo->texture,
+ .gl_target = GL_TEXTURE_2D,
+ .w = src_fbo->w,
+ .h = src_fbo->h,
+ .src = {0, 0, w, h},
+ };
+}
+
+static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg,
+ struct gl_transform *chroma)
+{
+ *chroma = (struct gl_transform){{{0}}};
+
+ assert(vimg->mpi);
+
+ float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
+ float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
+
+ if (p->image_params.chroma_location != MP_CHROMA_CENTER) {
+ int cx, cy;
+ mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy);
+ // By default texture coordinates are such that chroma is centered with
+ // any chroma subsampling. If a specific direction is given, make it
+ // so that the luma and chroma sample line up exactly.
+ // For 4:4:4, setting chroma location should have no effect at all.
+ // luma sample size (in chroma coord. space)
+ chroma->t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
+ chroma->t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ }
+
+ // Make sure luma/chroma sizes are aligned.
+ // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2
+ // so luma (3,3) has to align with chroma (2,2).
+ chroma->m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w;
+ chroma->m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h;
+
+ for (int n = 0; n < p->plane_count; n++) {
+ struct texplane *t = &vimg->planes[n];
+ p->pass_tex[n] = (struct src_tex){
+ .gl_tex = vimg->planes[n].gl_texture,
+ .gl_target = t->gl_target,
+ .w = t->w,
+ .h = t->h,
+ .src = {0, 0, t->w, t->h},
+ };
+ }
+}
+
+static void init_video(struct gl_video *p)
+{
+ GL *gl = p->gl;
+
+ check_gl_features(p);
+
+ init_format(p->image_params.imgfmt, p);
+ p->gl_target = p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D;
+
+ if (p->hwdec_active) {
+ if (p->hwdec->driver->reinit(p->hwdec, &p->image_params) < 0)
+ MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
+ init_format(p->image_params.imgfmt, p);
+ p->gl_target = p->hwdec->gl_texture_target;
+ }
+
+ mp_image_params_guess_csp(&p->image_params);
+
+ p->image_w = p->image_params.w;
+ p->image_h = p->image_params.h;
+
+ int eq_caps = MP_CSP_EQ_CAPS_GAMMA;
+ if (p->is_yuv && p->image_params.colorspace != MP_CSP_BT_2020_C)
+ eq_caps |= MP_CSP_EQ_CAPS_COLORMATRIX;
+ if (p->image_desc.flags & MP_IMGFLAG_XYZ)
+ eq_caps |= MP_CSP_EQ_CAPS_BRIGHTNESS;
+ p->video_eq.capabilities = eq_caps;
+
+ av_lfg_init(&p->lfg, 1);
+
+ debug_check_gl(p, "before video texture creation");
+
+ struct video_image *vimg = &p->image;
+
+ struct mp_image layout = {0};
+ mp_image_set_params(&layout, &p->image_params);
+
+ for (int n = 0; n < p->plane_count; n++) {
+ struct texplane *plane = &vimg->planes[n];
+
+ plane->gl_target = p->gl_target;
+
+ plane->w = mp_image_plane_w(&layout, n);
+ plane->h = mp_image_plane_h(&layout, n);
+
+ if (!p->hwdec_active) {
+ gl->ActiveTexture(GL_TEXTURE0 + n);
+ gl->GenTextures(1, &plane->gl_texture);
+ gl->BindTexture(p->gl_target, plane->gl_texture);
+
+ gl->TexImage2D(p->gl_target, 0, plane->gl_internal_format,
+ plane->w, plane->h, 0,
+ plane->gl_format, plane->gl_type, NULL);
+
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ }
+
+ MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, plane->w, plane->h);
+ }
+ gl->ActiveTexture(GL_TEXTURE0);
+
+ debug_check_gl(p, "after video texture creation");
+
+ reinit_rendering(p);
+}
+
+static void uninit_video(struct gl_video *p)
+{
+ GL *gl = p->gl;
+
+ uninit_rendering(p);
+
+ struct video_image *vimg = &p->image;
+
+ for (int n = 0; n < p->plane_count; n++) {
+ struct texplane *plane = &vimg->planes[n];
+
+ if (!p->hwdec_active)
+ gl->DeleteTextures(1, &plane->gl_texture);
+ plane->gl_texture = 0;
+ gl->DeleteBuffers(1, &plane->gl_buffer);
+ plane->gl_buffer = 0;
+ }
+ mp_image_unrefp(&vimg->mpi);
+
+ // Invalidate image_params to ensure that gl_video_config() will call
+ // init_video() on uninitialized gl_video.
+ p->real_image_params = (struct mp_image_params){0};
+ p->image_params = p->real_image_params;
+}
+
+static void pass_prepare_src_tex(struct gl_video *p)
+{
+ GL *gl = p->gl;
+ struct gl_shader_cache *sc = p->sc;
+
+ for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
+ struct src_tex *s = &p->pass_tex[n];
+ if (!s->gl_tex)
+ continue;
+
+ char texture_name[32];
+ char texture_size[32];
+ snprintf(texture_name, sizeof(texture_name), "texture%d", n);
+ snprintf(texture_size, sizeof(texture_size), "texture_size%d", n);
+
+ gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n);
+ float f[2] = {1, 1};
+ if (s->gl_target != GL_TEXTURE_RECTANGLE) {
+ f[0] = s->w;
+ f[1] = s->h;
+ }
+ gl_sc_uniform_vec2(sc, texture_size, f);
+
+ gl->ActiveTexture(GL_TEXTURE0 + n);
+ gl->BindTexture(s->gl_target, s->gl_tex);
+ }
+ gl->ActiveTexture(GL_TEXTURE0);
+}
+
+// flags = bits 0-1: rotate, bit 2: flip vertically
+static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
+ const struct mp_rect *dst, int flags)
+{
+ struct vertex va[4];
+
+ struct gl_transform t;
+ gl_transform_ortho(&t, 0, vp_w, 0, vp_h);
+
+ float x[2] = {dst->x0, dst->x1};
+ float y[2] = {dst->y0, dst->y1};
+ gl_transform_vec(t, &x[0], &y[0]);
+ gl_transform_vec(t, &x[1], &y[1]);
+
+ for (int n = 0; n < 4; n++) {
+ struct vertex *v = &va[n];
+ v->position.x = x[n / 2];
+ v->position.y = y[n % 2];
+ for (int i = 0; i < TEXUNIT_VIDEO_NUM; i++) {
+ struct src_tex *s = &p->pass_tex[i];
+ if (s->gl_tex) {
+ float tx[2] = {s->src.x0, s->src.x1};
+ float ty[2] = {s->src.y0, s->src.y1};
+ if (flags & 4)
+ MPSWAP(float, ty[0], ty[1]);
+ bool rect = s->gl_target == GL_TEXTURE_RECTANGLE;
+ v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->w);
+ v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->h);
+ }
+ }
+ }
+
+ int rot = flags & 3;
+ while (rot--) {
+ static const int perm[4] = {1, 3, 0, 2};
+ struct vertex vb[4];
+ memcpy(vb, va, sizeof(vb));
+ for (int n = 0; n < 4; n++)
+ memcpy(va[n].texcoord, vb[perm[n]].texcoord,
+ sizeof(struct vertex_pt[TEXUNIT_VIDEO_NUM]));
+ }
+
+ p->gl->Viewport(0, 0, vp_w, abs(vp_h));
+ gl_vao_draw_data(&p->vao, GL_TRIANGLE_STRIP, va, 4);
+
+ debug_check_gl(p, "after rendering");
+}
+
+// flags: see render_pass_quad
+static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h,
+ const struct mp_rect *dst, int flags)
+{
+ GL *gl = p->gl;
+ pass_prepare_src_tex(p);
+ gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
+ gl_sc_gen_shader_and_reset(p->sc);
+ render_pass_quad(p, vp_w, vp_h, dst, flags);
+ gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+ memset(&p->pass_tex, 0, sizeof(p->pass_tex));
+}
+
+// dst_fbo: this will be used for rendering; possibly reallocating the whole
+// FBO, if the required parameters have changed
+// w, h: required FBO target dimension, and also defines the target rectangle
+// used for rasterization
+// tex: the texture unit to load the result back into
+// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy
+// flags allows the FBO to be larger than the w/h parameters)
+static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
+ int w, int h, int tex, int flags)
+{
+ fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
+
+ finish_pass_direct(p, dst_fbo->fbo, dst_fbo->w, dst_fbo->h,
+ &(struct mp_rect){0, 0, w, h}, 0);
+ pass_load_fbotex(p, dst_fbo, w, h, tex);
+}
+
+static void uninit_scaler(struct gl_video *p, struct scaler *scaler)
+{
+ GL *gl = p->gl;
+ fbotex_uninit(&scaler->sep_fbo);
+ gl->DeleteTextures(1, &scaler->gl_lut);
+ scaler->gl_lut = 0;
+ scaler->kernel = NULL;
+ scaler->initialized = false;
+}
+
+static void load_shader(struct gl_video *p, const char *body)
+{
+ gl_sc_hadd(p->sc, body);
+ gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX);
+ gl_sc_uniform_f(p->sc, "frame", p->frames_uploaded);
+ gl_sc_uniform_vec2(p->sc, "image_size", (GLfloat[]){p->image_w, p->image_h});
+}
+
+// Applies an arbitrary number of shaders in sequence, using the given pair
+// of FBOs as intermediate buffers. Returns whether any shaders were applied.
+static bool apply_shaders(struct gl_video *p, char **shaders,
+ struct fbotex textures[2], int tex_num, int w, int h)
+{
+ if (!shaders)
+ return false;
+ bool success = false;
+ int tex = 0;
+ for (int n = 0; shaders[n]; n++) {
+ const char *body = gl_sc_loadfile(p->sc, shaders[n]);
+ if (!body)
+ continue;
+ finish_pass_fbo(p, &textures[tex], w, h, tex_num, 0);
+ load_shader(p, body);
+ GLSLF("// custom shader\n");
+ GLSLF("vec4 color = sample(texture%d, texcoord%d, texture_size%d);\n",
+ tex_num, tex_num, tex_num);
+ tex = (tex+1) % 2;
+ success = true;
+ }
+ return success;
+}
+
+// Semantic equality
+static bool double_seq(double a, double b)
+{
+ return (isnan(a) && isnan(b)) || a == b;
+}
+
+static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b)
+{
+ if ((a.name && !b.name) || (b.name && !a.name))
+ return false;
+
+ return ((!a.name && !b.name) || strcmp(a.name, b.name) == 0) &&
+ double_seq(a.params[0], b.params[0]) &&
+ double_seq(a.params[1], b.params[1]) &&
+ a.blur == b.blur;
+}
+
+static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b)
+{
+ // Note: antiring isn't compared because it doesn't affect LUT
+ // generation
+ return scaler_fun_eq(a.kernel, b.kernel) &&
+ scaler_fun_eq(a.window, b.window) &&
+ a.radius == b.radius &&
+ a.clamp == b.clamp;
+}
+
+static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
+ const struct scaler_config *conf,
+ double scale_factor,
+ int sizes[])
+{
+ GL *gl = p->gl;
+
+ if (scaler_conf_eq(scaler->conf, *conf) &&
+ scaler->scale_factor == scale_factor &&
+ scaler->initialized)
+ return;
+
+ uninit_scaler(p, scaler);
+
+ scaler->conf = *conf;
+ scaler->scale_factor = scale_factor;
+ scaler->insufficient = false;
+ scaler->initialized = true;
+
+ const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name);
+ if (!t_kernel)
+ return;
+
+ scaler->kernel_storage = *t_kernel;
+ scaler->kernel = &scaler->kernel_storage;
+
+ const char *win = conf->window.name;
+ if (!win || !win[0])
+ win = t_kernel->window; // fall back to the scaler's default window
+ const struct filter_window *t_window = mp_find_filter_window(win);
+ if (t_window)
+ scaler->kernel->w = *t_window;
+
+ for (int n = 0; n < 2; n++) {
+ if (!isnan(conf->kernel.params[n]))
+ scaler->kernel->f.params[n] = conf->kernel.params[n];