summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/video.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r--video/out/opengl/video.c1109
1 files changed, 621 insertions, 488 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index c10e16fe41..8807b65005 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -106,21 +106,38 @@ struct video_image {
struct mp_image *mpi; // original input image
};
-struct fbosurface {
- struct fbotex fbotex;
- double pts;
+enum plane_type {
+ PLANE_NONE = 0,
+ PLANE_RGB,
+ PLANE_LUMA,
+ PLANE_CHROMA,
+ PLANE_ALPHA,
+ PLANE_XYZ,
};
-#define FBOSURFACES_MAX 10
-
-struct src_tex {
+// A self-contained description of a source image which can be bound to a
+// texture unit and sampled from. Contains metadata about how it's to be used
+struct img_tex {
+ enum plane_type type; // must be set to something non-zero
+ int components; // number of relevant coordinates
+ float multiplier; // multiplier to be used when sampling
GLuint gl_tex;
GLenum gl_target;
bool use_integer;
- int w, h;
- struct mp_rect_f src;
+ int tex_w, tex_h; // source texture size
+ int w, h; // logical size (with pre_transform applied)
+ struct gl_transform pre_transform; // source texture space
+ struct gl_transform transform; // rendering transformation
+ bool texture_la; // it's a GL_LUMINANCE_ALPHA texture (access with .ra not .rg)
};
+struct fbosurface {
+ struct fbotex fbotex;
+ double pts;
+};
+
+#define FBOSURFACES_MAX 10
+
struct cached_file {
char *path;
char *body;
@@ -132,6 +149,7 @@ struct gl_video {
struct mpv_global *global;
struct mp_log *log;
struct gl_video_opts opts;
+ struct gl_lcms *cms;
bool gl_debug;
int texture_16bit_depth; // actual bits available in 16 bit textures
@@ -169,15 +187,15 @@ struct gl_video {
bool dumb_mode;
bool forced_dumb_mode;
- struct fbotex chroma_merge_fbo;
- struct fbotex chroma_deband_fbo;
+ struct fbotex merge_fbo[4];
+ struct fbotex deband_fbo[4];
+ struct fbotex scale_fbo[4];
+ struct fbotex integer_fbo[4];
struct fbotex indirect_fbo;
struct fbotex blend_subs_fbo;
struct fbotex unsharp_fbo;
struct fbotex output_fbo;
- struct fbotex deband_fbo;
struct fbosurface surfaces[FBOSURFACES_MAX];
- struct fbotex integer_conv_fbo[TEXUNIT_VIDEO_NUM];
// these are duplicated so we can keep rendering back and forth between
// them to support an unlimited number of shader passes per step
@@ -192,8 +210,8 @@ struct gl_video {
bool is_interpolated;
bool output_fbo_valid;
- // state for luma (0), luma-down(1), chroma (2) and temporal (3) scalers
- struct scaler scaler[4];
+ // state for configured scalers
+ struct scaler scaler[SCALER_COUNT];
struct mp_csp_equalizer video_eq;
@@ -203,11 +221,12 @@ struct gl_video {
int vp_w, vp_h;
// temporary during rendering
- struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ int pass_tex_num;
int texture_w, texture_h;
struct gl_transform texture_offset; // texture transform without rotation
+ int components;
bool use_linear;
- bool use_normalized_range;
float user_gamma;
int frames_uploaded;
@@ -418,10 +437,10 @@ const struct m_sub_options gl_video_conf = {
OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
OPT_FLAG("pbo", pbo, 0),
- SCALER_OPTS("scale", 0),
- SCALER_OPTS("dscale", 1),
- SCALER_OPTS("cscale", 2),
- SCALER_OPTS("tscale", 3),
+ SCALER_OPTS("scale", SCALER_SCALE),
+ SCALER_OPTS("dscale", SCALER_DSCALE),
+ SCALER_OPTS("cscale", SCALER_CSCALE),
+ SCALER_OPTS("tscale", SCALER_TSCALE),
OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10),
OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0),
OPT_FLAG("linear-scaling", linear_scaling, 0),
@@ -470,7 +489,7 @@ const struct m_sub_options gl_video_conf = {
OPT_FLAG("deband", deband, 0),
OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
OPT_FLOAT("sharpen", unsharp, 0),
- OPT_CHOICE("prescale", prescale, 0,
+ OPT_CHOICE("prescale-luma", prescale_luma, 0,
({"none", 0},
{"superxbr", 1}
#if HAVE_NNEDI
@@ -505,6 +524,7 @@ const struct m_sub_options gl_video_conf = {
OPT_REPLACED("smoothmotion-threshold", "tscale-param1"),
OPT_REPLACED("scale-down", "dscale"),
OPT_REPLACED("fancy-downscaling", "correct-downscaling"),
+ OPT_REPLACED("prescale", "prescale-luma"),
{0}
},
@@ -518,7 +538,7 @@ static void check_gl_features(struct gl_video *p);
static bool init_format(int fmt, struct gl_video *init);
static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi);
static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src);
-static void get_scale_factors(struct gl_video *p, double xy[2]);
+static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]);
#define GLSL(x) gl_sc_add(p->sc, #x "\n");
#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
@@ -639,7 +659,7 @@ static void uninit_rendering(struct gl_video *p)
{
GL *gl = p->gl;
- for (int n = 0; n < 4; n++)
+ for (int n = 0; n < SCALER_COUNT; n++)
uninit_scaler(p, &p->scaler[n]);
gl->DeleteTextures(1, &p->dither_texture);
@@ -648,15 +668,16 @@ static void uninit_rendering(struct gl_video *p)
gl->DeleteBuffers(1, &p->nnedi3_weights_buffer);
p->nnedi3_weights_buffer = 0;
- fbotex_uninit(&p->chroma_merge_fbo);
- fbotex_uninit(&p->chroma_deband_fbo);
+ for (int n = 0; n < 4; n++) {
+ fbotex_uninit(&p->merge_fbo[n]);
+ fbotex_uninit(&p->deband_fbo[n]);
+ fbotex_uninit(&p->scale_fbo[n]);
+ fbotex_uninit(&p->integer_fbo[n]);
+ }
+
fbotex_uninit(&p->indirect_fbo);
fbotex_uninit(&p->blend_subs_fbo);
fbotex_uninit(&p->unsharp_fbo);
- fbotex_uninit(&p->deband_fbo);
-
- for (int n = 0; n < 4; n++)
- fbotex_uninit(&p->integer_conv_fbo[n]);
for (int n = 0; n < 2; n++) {
fbotex_uninit(&p->pre_fbo[n]);
@@ -674,21 +695,31 @@ static void uninit_rendering(struct gl_video *p)
gl_video_reset_surfaces(p);
}
-void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
+void gl_video_update_profile(struct gl_video *p)
+{
+ if (p->use_lut_3d)
+ return;
+
+ p->use_lut_3d = true;
+ check_gl_features(p);
+
+ reinit_rendering(p);
+}
+
+static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
+ enum mp_csp_trc trc)
{
GL *gl = p->gl;
- if (!lut3d) {
- if (p->use_lut_3d) {
- p->use_lut_3d = false;
- reinit_rendering(p);
- }
- return;
- }
+ if (!p->cms || !p->use_lut_3d)
+ return false;
- if (!(gl->mpgl_caps & MPGL_CAP_3D_TEX) || gl->es) {
- MP_ERR(p, "16 bit fixed point 3D textures not available.\n");
- return;
+ if (!gl_lcms_has_changed(p->cms, prim, trc))
+ return true;
+
+ struct lut3d *lut3d = NULL;
+ if (!gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc) || !lut3d) {
+ return false;
}
if (!p->lut_3d_texture)
@@ -705,33 +736,76 @@ void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
gl->ActiveTexture(GL_TEXTURE0);
- p->use_lut_3d = true;
- check_gl_features(p);
-
debug_check_gl(p, "after 3d lut creation");
- reinit_rendering(p);
+ return true;
}
-static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo,
- int w, int h, int id)
+// Fill an img_tex struct from an FBO + some metadata
+static struct img_tex img_tex_fbo(struct fbotex *fbo, struct gl_transform t,
+ enum plane_type type, int components)
{
- p->pass_tex[id] = (struct src_tex){
- .gl_tex = src_fbo->texture,
+ assert(type != PLANE_NONE);
+ return (struct img_tex){
+ .type = type,
+ .gl_tex = fbo->texture,
.gl_target = GL_TEXTURE_2D,
- .w = src_fbo->w,
- .h = src_fbo->h,
- .src = {0, 0, w, h},
+ .multiplier = 1.0,
+ .use_integer = false,
+ .tex_w = fbo->rw,
+ .tex_h = fbo->rh,
+ .w = fbo->lw,
+ .h = fbo->lh,
+ .pre_transform = identity_trans,
+ .transform = t,
+ .components = components,
};
}
-static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg,
- struct gl_transform *chroma)
+// Bind an img_tex to a free texture unit and return its ID. At most
+// TEXUNIT_VIDEO_NUM texture units can be bound at once
+static int pass_bind(struct gl_video *p, struct img_tex tex)
+{
+ assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM);
+ p->pass_tex[p->pass_tex_num] = tex;
+ return p->pass_tex_num++;
+}
+
+// Rotation by 90° and flipping.
+static void get_plane_source_transform(struct gl_video *p, int w, int h,
+ struct gl_transform *out_tr)
{
- *chroma = (struct gl_transform){{{0}}};
+ struct gl_transform tr = identity_trans;
+ int a = p->image_params.rotate % 90 ? 0 : p->image_params.rotate / 90;
+ int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc.
+ int cos90[4] = {1, 0, -1, 0};
+ struct gl_transform rot = {{{cos90[a], sin90[a]}, {-sin90[a], cos90[a]}}};
+ gl_transform_trans(rot, &tr);
+
+ // basically, recenter to keep the whole image in view
+ float b[2] = {1, 1};
+ gl_transform_vec(rot, &b[0], &b[1]);
+ tr.t[0] += b[0] < 0 ? w : 0;
+ tr.t[1] += b[1] < 0 ? h : 0;
+ if (p->image.image_flipped) {
+ struct gl_transform flip = {{{1, 0}, {0, -1}}, {0, h}};
+ gl_transform_trans(flip, &tr);
+ }
+
+ *out_tr = tr;
+}
+
+// Places a video_image's image textures + associated metadata into tex[]. The
+// number of textures is equal to p->plane_count.
+static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg,
+ struct img_tex tex[4])
+{
assert(vimg->mpi);
+ // Determine the chroma offset
+ struct gl_transform chroma = (struct gl_transform){{{0}}};
+
float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
@@ -743,26 +817,56 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
// so that the luma and chroma sample line up exactly.
// For 4:4:4, setting chroma location should have no effect at all.
// luma sample size (in chroma coord. space)
- chroma->t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
- chroma->t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
+ chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
}
// Make sure luma/chroma sizes are aligned.
// Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2
// so luma (3,3) has to align with chroma (2,2).
- chroma->m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w;
- chroma->m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h;
+ chroma.m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w;
+ chroma.m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h;
+ // The existing code assumes we just have a single tex multiplier for
+ // all of the planes. This may change in the future
+ float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.colorspace,
+ p->image_desc.component_bits,
+ p->image_desc.component_full_bits);
+
+ memset(tex, 0, 4 * sizeof(tex[0]));
for (int n = 0; n < p->plane_count; n++) {
struct texplane *t = &vimg->planes[n];
- p->pass_tex[n] = (struct src_tex){
+
+ enum plane_type type;
+ if (n >= 3) {
+ type = PLANE_ALPHA;
+ } else if (p->image_desc.flags & MP_IMGFLAG_RGB) {
+ type = PLANE_RGB;
+ } else if (p->image_desc.flags & MP_IMGFLAG_YUV) {
+ type = n == 0 ? PLANE_LUMA : PLANE_CHROMA;
+ } else if (p->image_desc.flags & MP_IMGFLAG_XYZ) {
+ type = PLANE_XYZ;
+ } else {
+ abort();
+ }
+
+ tex[n] = (struct img_tex){
+ .type = type,
.gl_tex = t->gl_texture,
.gl_target = t->gl_target,
+ .multiplier = tex_mul,
.use_integer = t->use_integer,
+ .tex_w = t->w,
+ .tex_h = t->h,
.w = t->w,
.h = t->h,
- .src = {0, 0, t->w, t->h},
+ .transform = type == PLANE_CHROMA ? chroma : identity_trans,
+ .components = p->image_desc.components[n],
+ .texture_la = t->gl_format == GL_LUMINANCE_ALPHA,
};
+ get_plane_source_transform(p, t->w, t->h, &tex[n].pre_transform);
+ if (p->image_params.rotate % 180 == 90)
+ MPSWAP(int, tex[n].w, tex[n].h);
}
}
@@ -864,8 +968,8 @@ static void pass_prepare_src_tex(struct gl_video *p)
GL *gl = p->gl;
struct gl_shader_cache *sc = p->sc;
- for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
- struct src_tex *s = &p->pass_tex[n];
+ for (int n = 0; n < p->pass_tex_num; n++) {
+ struct img_tex *s = &p->pass_tex[n];
if (!s->gl_tex)
continue;
@@ -883,8 +987,8 @@ static void pass_prepare_src_tex(struct gl_video *p)
}
float f[2] = {1, 1};
if (s->gl_target != GL_TEXTURE_RECTANGLE) {
- f[0] = s->w;
- f[1] = s->h;
+ f[0] = s->tex_w;
+ f[1] = s->tex_h;
}
gl_sc_uniform_vec2(sc, texture_size, f);
gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0],
@@ -896,11 +1000,10 @@ static void pass_prepare_src_tex(struct gl_video *p)
gl->ActiveTexture(GL_TEXTURE0);
}
-// flags = bits 0-1: rotate, bit 2: flip vertically
static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
- const struct mp_rect *dst, int flags)
+ const struct mp_rect *dst)
{
- struct vertex va[4];
+ struct vertex va[4] = {0};
struct gl_transform t;
gl_transform_ortho(&t, 0, vp_w, 0, vp_h);
@@ -914,30 +1017,21 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
struct vertex *v = &va[n];
v->position.x = x[n / 2];
v->position.y = y[n % 2];
- for (int i = 0; i < TEXUNIT_VIDEO_NUM; i++) {
- struct src_tex *s = &p->pass_tex[i];
- if (s->gl_tex) {
- float tx[2] = {s->src.x0, s->src.x1};
- float ty[2] = {s->src.y0, s->src.y1};
- if (flags & 4)
- MPSWAP(float, ty[0], ty[1]);
- bool rect = s->gl_target == GL_TEXTURE_RECTANGLE;
- v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->w);
- v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->h);
- }
+ for (int i = 0; i < p->pass_tex_num; i++) {
+ struct img_tex *s = &p->pass_tex[i];
+ if (!s->gl_tex)
+ continue;
+ struct gl_transform tr = s->transform;
+ gl_transform_trans(s->pre_transform, &tr);
+ float tx = (n / 2) * s->w;
+ float ty = (n % 2) * s->h;
+ gl_transform_vec(tr, &tx, &ty);
+ bool rect = s->gl_target == GL_TEXTURE_RECTANGLE;
+ v->texcoord[i].x = tx / (rect ? 1 : s->tex_w);
+ v->texcoord[i].y = ty / (rect ? 1 : s->tex_h);
}
}
- int rot = flags & 3;
- while (rot--) {
- static const int perm[4] = {1, 3, 0, 2};
- struct vertex vb[4];
- memcpy(vb, va, sizeof(vb));
- for (int n = 0; n < 4; n++)
- memcpy(va[n].texcoord, vb[perm[n]].texcoord,
- sizeof(struct vertex_pt[TEXUNIT_VIDEO_NUM]));
- }
-
p->gl->Viewport(0, 0, vp_w, abs(vp_h));
gl_vao_draw_data(&p->vao, GL_TRIANGLE_STRIP, va, 4);
@@ -946,32 +1040,37 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
// flags: see render_pass_quad
static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h,
- const struct mp_rect *dst, int flags)
+ const struct mp_rect *dst)
{
GL *gl = p->gl;
pass_prepare_src_tex(p);
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
gl_sc_gen_shader_and_reset(p->sc);
- render_pass_quad(p, vp_w, vp_h, dst, flags);
+ render_pass_quad(p, vp_w, vp_h, dst);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
memset(&p->pass_tex, 0, sizeof(p->pass_tex));
+ p->pass_tex_num = 0;
}
// dst_fbo: this will be used for rendering; possibly reallocating the whole
// FBO, if the required parameters have changed
// w, h: required FBO target dimension, and also defines the target rectangle
// used for rasterization
-// tex: the texture unit to load the result back into
// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy
// flags allows the FBO to be larger than the w/h parameters)
static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
- int w, int h, int tex, int flags)
+ int w, int h, int flags)
{
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
- finish_pass_direct(p, dst_fbo->fbo, dst_fbo->w, dst_fbo->h,
- &(struct mp_rect){0, 0, w, h}, 0);
- pass_load_fbotex(p, dst_fbo, w, h, tex);
+ finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
+ &(struct mp_rect){0, 0, w, h});
+}
+
+static void skip_unused(struct gl_video *p, int num_components)
+{
+ for (int i = num_components; i < 4; i++)
+ GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0);
}
static void uninit_scaler(struct gl_video *p, struct scaler *scaler)
@@ -1008,8 +1107,8 @@ static const char *get_custom_shader_fn(struct gl_video *p, const char *body)
// Applies an arbitrary number of shaders in sequence, using the given pair
// of FBOs as intermediate buffers. Returns whether any shaders were applied.
-static bool apply_shaders(struct gl_video *p, char **shaders,
- struct fbotex textures[2], int tex_num, int w, int h)
+static bool apply_shaders(struct gl_video *p, char **shaders, int w, int h,
+ struct fbotex textures[2])
{
if (!shaders)
return false;
@@ -1019,13 +1118,15 @@ static bool apply_shaders(struct gl_video *p, char **shaders,
const char *body = load_cached_file(p, shaders[n]);
if (!body)
continue;
- finish_pass_fbo(p, &textures[tex], w, h, tex_num, 0);
- GLSLHF("#define pixel_size pixel_size%d\n", tex_num);
+ finish_pass_fbo(p, &textures[tex], w, h, 0);
+ int id = pass_bind(p, img_tex_fbo(&textures[tex], identity_trans,
+ PLANE_RGB, p->components));
+ GLSLHF("#define pixel_size pixel_size%d\n", id);
load_shader(p, body);
const char *fn_name = get_custom_shader_fn(p, body);
GLSLF("// custom shader\n");
GLSLF("color = %s(texture%d, texcoord%d, texture_size%d);\n",
- fn_name, tex_num, tex_num, tex_num);
+ fn_name, id, id, id);
tex = (tex+1) % 2;
success = true;
}
@@ -1165,46 +1266,52 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
}
// Special helper for sampling from two separated stages
-static void pass_sample_separated(struct gl_video *p, int src_tex,
- struct scaler *scaler, int w, int h,
- struct gl_transform transform)
+static void pass_sample_separated(struct gl_video *p, struct img_tex src,
+ struct scaler *scaler, int w, int h)
{
- // Keep the x components untouched for the first pass
- struct mp_rect_f src_new = p->pass_tex[src_tex].src;
- gl_transform_rect(transform, &src_new);
+ // Separate the transformation into x and y components, per pass
+ struct gl_transform t_x = {
+ .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}},
+ .t = {src.transform.t[0], 0.0},
+ };
+ struct gl_transform t_y = {
+ .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}},
+ .t = {0.0, src.transform.t[1]},
+ };
+
+ // First pass (scale only in the y dir)
+ src.transform = t_y;
+ sampler_prelude(p->sc, pass_bind(p, src));
GLSLF("// pass 1\n");
- p->pass_tex[src_tex].src.y0 = src_new.y0;
- p->pass_tex[src_tex].src.y1 = src_new.y1;
pass_sample_separated_gen(p->sc, scaler, 0, 1);
- int src_w = p->pass_tex[src_tex].src.x1 - p->pass_tex[src_tex].src.x0;
- finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H);
- // Restore the sample source for the second pass
- sampler_prelude(p->sc, src_tex);
+ GLSLF("color *= %f;\n", src.multiplier);
+ finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H);
+
+ // Second pass (scale only in the x dir)
+ src = img_tex_fbo(&scaler->sep_fbo, t_x, src.type, src.components);
+ sampler_prelude(p->sc, pass_bind(p, src));
GLSLF("// pass 2\n");
- p->pass_tex[src_tex].src.x0 = src_new.x0;
- p->pass_tex[src_tex].src.x1 = src_new.x1;
pass_sample_separated_gen(p->sc, scaler, 1, 0);
}
-// Sample. This samples from the texture ID given by src_tex. It's hardcoded to
-// use all variables and values associated with it (which includes textureN,
-// texcoordN and texture_sizeN).
-// The src rectangle is implicit in p->pass_tex + transform.
+// Sample from img_tex, with the src rectangle given by it.
// The dst rectangle is implicit by what the caller will do next, but w and h
// must still be what is going to be used (to dimension FBOs correctly).
// This will write the scaled contents to the vec4 "color".
// The scaler unit is initialized by this function; in order to avoid cache
// thrashing, the scaler unit should usually use the same parameters.
-static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
- const struct scaler_config *conf, double scale_factor,
- int w, int h, struct gl_transform transform)
+static void pass_sample(struct gl_video *p, struct img_tex tex,
+ struct scaler *scaler, const struct scaler_config *conf,
+ double scale_factor, int w, int h)
{
reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
- sampler_prelude(p->sc, src_tex);
- // Set up the transformation for everything other than separated scaling
- if (!scaler->kernel || scaler->kernel->polar)
- gl_transform_rect(transform, &p->pass_tex[src_tex].src);
+ bool is_separated = scaler->kernel && !scaler->kernel->polar;
+
+ // Set up the transformation+prelude and bind the texture, for everything
+ // other than separated scaling (which does this in the subfunction)
+ if (!is_separated)
+ sampler_prelude(p->sc, pass_bind(p, tex));
// Dispatch the scaler. They're all wildly different.
const char *name = scaler->conf.kernel.name;
@@ -1227,28 +1334,42 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
} else if (scaler->kernel && scaler->kernel->polar) {
pass_sample_polar(p->sc, scaler);
} else if (scaler->kernel) {
- pass_sample_separated(p, src_tex, scaler, w, h, transform);
+ pass_sample_separated(p, tex, scaler, w, h);
} else {
// Should never happen
abort();
}
+ // Apply any required multipliers. Separated scaling already does this in
+ // its first stage
+ if (!is_separated)
+ GLSLF("color *= %f;\n", tex.multiplier);
+
// Micro-optimization: Avoid scaling unneeded channels
- if (!p->has_alpha || p->opts.alpha_mode != 1)
- GLSL(color.a = 1.0;)
+ skip_unused(p, tex.components);
}
// Get the number of passes for prescaler, with given display size.
-static int get_prescale_passes(struct gl_video *p)
+static int get_prescale_passes(struct gl_video *p, struct img_tex tex[4])
{
- if (!p->opts.prescale)
+ if (!p->opts.prescale_luma)
return 0;
+
+ // Return 0 if no luma planes exist
+ for (int n = 0; ; n++) {
+ if (n > 4)
+ return 0;
+
+ if (tex[n].type == PLANE_LUMA)
+ break;
+ }
+
// The downscaling threshold check is turned off.
if (p->opts.prescale_downscaling_threshold < 1.0f)
return p->opts.prescale_passes;
double scale_factors[2];
- get_scale_factors(p, scale_factors);
+ get_scale_factors(p, true, scale_factors);
int passes = 0;
for (; passes < p->opts.prescale_passes; passes ++) {
@@ -1265,283 +1386,303 @@ static int get_prescale_passes(struct gl_video *p)
return passes;
}
-// apply pre-scalers
-static void pass_prescale(struct gl_video *p, int src_tex_num, int dst_tex_num,
- int planes, int w, int h, int passes,
- float tex_mul, struct gl_transform *offset)
+// Upload the NNEDI3 UBO weights only if needed
+static void upload_nnedi3_weights(struct gl_video *p)
{
- *offset = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
+ GL *gl = p->gl;
- int tex_num = src_tex_num;
+ if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO &&
+ !p->nnedi3_weights_buffer)
+ {
+ gl->GenBuffers(1, &p->nnedi3_weights_buffer);
+ gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, p->nnedi3_weights_buffer);
- // Happens to be the same for superxbr and nnedi3.
- const int steps_per_pass = 2;
+ int size;
+ const float *weights = get_nnedi3_weights(p->opts.nnedi3_opts, &size);
- for (int pass = 0; pass < passes; pass++) {
- for (int step = 0; step < steps_per_pass; step++) {
- struct gl_transform transform = {{{0}}};
+ MP_VERBOSE(p, "Uploading NNEDI3 weights via UBO (size=%d)\n", size);
- switch(p->opts.prescale) {
- case 1:
- pass_superxbr(p->sc, planes, tex_num, step,
- tex_mul, p->opts.superxbr_opts, &transform);
- break;
- case 2:
- pass_nnedi3(p->gl, p->sc, planes, tex_num, step,
- tex_mul, p->opts.nnedi3_opts, &transform);
- break;
- default:
- abort();
- }
+ // We don't know the endianness of GPU, just assume it's LE
+ gl->BufferData(GL_UNIFORM_BUFFER, size, weights, GL_STATIC_DRAW);
+ }
+}
- tex_mul = 1.0;
+// Applies a single pass of the prescaler, and accumulates the offset in
+// pass_transform.
+static void pass_prescale_luma(struct gl_video *p, struct img_tex *tex,
+ struct gl_transform *pass_transform,
+ struct fbotex fbo[MAX_PRESCALE_STEPS])
+{
+ // Happens to be the same for superxbr and nnedi3.
+ const int num_steps = 2;
+
+ for (int step = 0; step < num_steps; step++) {
+ struct gl_transform step_transform = {{{0}}};
+ int id = pass_bind(p, *tex);
+ int planes = tex->components;
+
+ switch(p->opts.prescale_luma) {
+ case 1:
+ assert(planes == 1);
+ pass_superxbr(p->sc, id, step, tex->multiplier,
+ p->opts.superxbr_opts, &step_transform);
+ break;
+ case 2:
+ upload_nnedi3_weights(p);
+ pass_nnedi3(p->gl, p->sc, planes, id, step, tex->multiplier,
+ p->opts.nnedi3_opts, &step_transform, tex->gl_target);
+ break;
+ default:
+ abort();
+ }
- gl_transform_trans(transform, offset);
+ int new_w = tex->w * (int)step_transform.m[0][0],
+ new_h = tex->h * (int)step_transform.m[1][1];
- w *= (int)transform.m[0][0];
- h *= (int)transform.m[1][1];
+ skip_unused(p, planes);
+ finish_pass_fbo(p, &fbo[step], new_w, new_h, 0);
+ *tex = img_tex_fbo(&fbo[step], identity_trans, tex->type, tex->components);
- finish_pass_fbo(p, &p->prescale_fbo[pass][step],
- w, h, dst_tex_num, 0);
- tex_num = dst_tex_num;
- }
+ // Accumulate the local transform
+ gl_transform_trans(step_transform, pass_transform);
}
}
-// Prescale the planes from the main textures.
-static bool pass_prescale_luma(struct gl_video *p, float tex_mul,
- struct gl_transform *chromafix,
- struct gl_transform *transform,
- struct src_tex *prescaled_tex,
- int *prescaled_planes)
+// Copy a texture to the vec4 color, while increasing offset. Also applies
+// the texture multiplier to the sampled color
+static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img)
{
- if (p->opts.prescale == 2 &&
- p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO)
- {
- // nnedi3 are configured to use uniform buffer objects.
- if (!p->nnedi3_weights_buffer) {
- p->gl->GenBuffers(1, &p->nnedi3_weights_buffer);
- p->gl->BindBufferBase(GL_UNIFORM_BUFFER, 0,
- p->nnedi3_weights_buffer);
- int weights_size;
- const float *weights =
- get_nnedi3_weights(p->opts.nnedi3_opts, &weights_size);
-
- MP_VERBOSE(p, "Uploading NNEDI3 weights via uniform buffer (size=%d)\n",
- weights_size);
-
- // We don't know the endianness of GPU, just assume it's little
- // endian.
- p->gl->BufferData(GL_UNIFORM_BUFFER, weights_size, weights,
- GL_STATIC_DRAW);
- }
+ int count = img.components;
+ assert(*offset + count <= 4);
+
+ int id = pass_bind(p, img);
+ char src[5] = {0};
+ char dst[5] = {0};
+ const char *tex_fmt = img.texture_la ? "ragg" : "rgba";
+ const char *dst_fmt = "rgba";
+ for (int i = 0; i < count; i++) {
+ src[i] = tex_fmt[i];
+ dst[i] = dst_fmt[*offset + i];
}
- // number of passes to apply prescaler, can be zero.
- int prescale_passes = get_prescale_passes(p);
- if (prescale_passes == 0)
- return false;
+ if (img.use_integer) {
+ uint64_t tex_max = 1ull << p->image_desc.component_full_bits;
+ img.multiplier *= 1.0 / (tex_max - 1);
+ }
- p->use_normalized_range = true;
+ GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n",
+ dst, img.multiplier, id, id, src);
- // estimate a safe upperbound of planes being prescaled on texture0.
- *prescaled_planes = p->is_yuv ? 1 :
- (!p->color_swizzle[0] || p->color_swizzle[3] == 'a') ? 3 : 4;
+ *offset += count;
+}
- struct src_tex tex_backup[4];
- for (int i = 0; i < 4; i++)
- tex_backup[i] = p->pass_tex[i];
+// sample from video textures, set "color" variable to yuv value
+static void pass_read_video(struct gl_video *p)
+{
+ struct img_tex tex[4];
+ pass_get_img_tex(p, &p->image, tex);
- if (p->opts.deband) {
- // apply debanding before upscaling.
- pass_sample_deband(p->sc, p->opts.deband_opts, 0, p->pass_tex[0].gl_target,
- tex_mul, &p->lfg);
- finish_pass_fbo(p, &p->deband_fbo, p->texture_w,
- p->texture_h, 0, 0);
- tex_backup[0] = p->pass_tex[0];
+ // Most of the steps here don't actually apply image transformations yet,
+ // save for the actual upscaling - so as a code convenience we store them
+ // separately
+ struct gl_transform transforms[4];
+ struct gl_transform tex_trans = identity_trans;
+ for (int i = 0; i < 4; i++) {
+ transforms[i] = tex[i].transform;
+ tex[i].transform = identity_trans;
}
- // process texture0 and store the result in texture4.
- pass_prescale(p, 0, 4, *prescaled_planes, p->texture_w, p->texture_h,
- prescale_passes, p->opts.deband ? 1.0 : tex_mul, transform);
-
- // correct the chromafix under new transform.
- chromafix->t[0] -= transform->t[0] / transform->m[0][0];
- chromafix->t[1] -= transform->t[1] / transform->m[1][1];
+ int prescale_passes = get_prescale_passes(p, tex);
- // restore the first four texture.
- for (int i = 0; i < 4; i++)
- p->pass_tex[i] = tex_backup[i];
+ int dst_w = p->texture_w << prescale_passes,
+ dst_h = p->texture_h << prescale_passes;
- // backup texture4 for later use.
- *prescaled_tex = p->pass_tex[4];
+ bool needs_deband[4];
+ int scaler_id[4]; // ID if needed, -1 otherwise
+ int needs_prescale[4]; // number of prescaling passes left
- return true;
-}
+ // Determine what needs to be done for which plane
+ for (int i=0; i < 4; i++) {
+ enum plane_type type = tex[i].type;
+ if (type == PLANE_NONE) {
+ needs_deband[i] = false;
+ needs_prescale[i] = 0;
+ scaler_id[i] = -1;
+ continue;
+ }
-// The input textures are in an integer format (non-fixed-point), like R16UI.
-// Convert it to float in an extra pass.
-static void pass_integer_conversion(struct gl_video *p, bool *chroma_merging)
-{
- double tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace,
- p->image_desc.component_bits,
- p->image_desc.component_full_bits);
- uint64_t tex_max = 1ull << p->image_desc.component_full_bits;
- tex_mul *= 1.0 / (tex_max - 1);
+ needs_deband[i] = type != PLANE_ALPHA ? p->opts.deband : false;
+ needs_prescale[i] = type == PLANE_LUMA ? prescale_passes : 0;
- struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
- assert(sizeof(pass_tex) == sizeof(p->pass_tex));
- memcpy(pass_tex, p->pass_tex, sizeof(pass_tex));
+ scaler_id[i] = -1;
+ switch (type) {
+ case PLANE_RGB:
+ case PLANE_LUMA:
+ case PLANE_XYZ:
+ scaler_id[i] = SCALER_SCALE;
+ break;
- *chroma_merging = p->plane_count == 3;
+ case PLANE_CHROMA:
+ scaler_id[i] = SCALER_CSCALE;
+ break;
- for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
- if (!p->pass_tex[n].gl_tex)
- continue;
- if (*chroma_merging && n == 2)
+ case PLANE_ALPHA: // always use bilinear for alpha
+ default:
continue;
- GLSLF("// integer conversion plane %d\n", n);
- GLSLF("uvec4 icolor = texture(texture%d, texcoord%d);\n", n, n);
- GLSLF("color = vec4(icolor) * tex_mul;\n");
- if (*chroma_merging && n == 1) {
- GLSLF("uvec4 icolor2 = texture(texture2, texcoord2);\n");
- GLSLF("color.g = vec4(icolor2).r * tex_mul;\n");
}
- gl_sc_uniform_f(p->sc, "tex_mul", tex_mul);
- int c_w = p->pass_tex[n].src.x1 - p->pass_tex[n].src.x0;
- int c_h = p->pass_tex[n].src.y1 - p->pass_tex[n].src.y0;
- finish_pass_fbo(p, &p->integer_conv_fbo[n], c_w, c_h, n, 0);
- pass_tex[n] = p->pass_tex[n];
- memcpy(p->pass_tex, pass_tex, sizeof(p->pass_tex));
+
+ // We can skip scaling if the texture is already at the required size
+ if (tex[i].w == dst_w && tex[i].h == dst_h)
+ scaler_id[i] = -1;
}
- p->use_normalized_range = true;
-}
+ // Process all the planes that need some action perfo