summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--video/img_format.c1
-rw-r--r--video/img_format.h1
-rw-r--r--video/out/opengl/nnedi3.c4
-rw-r--r--video/out/opengl/superxbr.c2
-rw-r--r--video/out/opengl/utils.c30
-rw-r--r--video/out/opengl/utils.h20
-rw-r--r--video/out/opengl/video.c806
-rw-r--r--video/out/opengl/video_shaders.c3
-rw-r--r--video/out/opengl/video_shaders.h2
9 files changed, 490 insertions, 379 deletions
diff --git a/video/img_format.c b/video/img_format.c
index 82136b5192..fe2ca14bf4 100644
--- a/video/img_format.c
+++ b/video/img_format.c
@@ -171,6 +171,7 @@ struct mp_imgfmt_desc mp_imgfmt_get_desc(int mpfmt)
shift = d.shift;
if (shift != d.shift)
shift = -1;
+ desc.components[d.plane] += 1;
}
for (int p = 0; p < 4; p++) {
diff --git a/video/img_format.h b/video/img_format.h
index b18a6f5d3f..a58e445ea2 100644
--- a/video/img_format.h
+++ b/video/img_format.h
@@ -93,6 +93,7 @@ struct mp_imgfmt_desc {
int8_t component_bits; // number of bits per component (0 if uneven)
int8_t component_full_bits; // number of bits per component including
// internal padding (0 if uneven)
+ int8_t components[MP_MAX_PLANES]; // number of components for each plane
// chroma shifts per plane (provided for convenience with planar formats)
int8_t xs[MP_MAX_PLANES];
int8_t ys[MP_MAX_PLANES];
diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c
index c07731611a..702a8dd55f 100644
--- a/video/out/opengl/nnedi3.c
+++ b/video/out/opengl/nnedi3.c
@@ -112,8 +112,8 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
const int offset = nnedi3_weight_offsets[conf->window * 4 + conf->neurons];
const uint32_t *weights = (const int*)(nnedi3_weights + offset * 4);
- GLSLF("// nnedi3 (tex %d, step %d, neurons %d, window %dx%d, mode %d)\n",
- tex_num, step + 1, neurons, width, height, conf->upload);
+ GLSLF("// nnedi3 (step %d, neurons %d, window %dx%d, mode %d)\n",
+ step, neurons, width, height, conf->upload);
// This is required since each row will be encoded into vec4s
assert(width % 4 == 0);
diff --git a/video/out/opengl/superxbr.c b/video/out/opengl/superxbr.c
index 8039e6e01d..87319aab99 100644
--- a/video/out/opengl/superxbr.c
+++ b/video/out/opengl/superxbr.c
@@ -76,7 +76,7 @@ void pass_superxbr(struct gl_shader_cache *sc, int planes, int tex_num,
struct gl_transform *transform)
{
assert(0 <= step && step < 2);
- GLSLF("// superxbr (tex %d, step %d)\n", tex_num, step + 1);
+ GLSLF("// superxbr (step %d)\n", step);
if (!conf)
conf = &superxbr_opts_def;
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 7329240593..02f1ea6584 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -355,13 +355,18 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
int cw = w, ch = h;
- if ((flags & FBOTEX_FUZZY_W) && cw < fbo->w)
- cw = fbo->w;
- if ((flags & FBOTEX_FUZZY_H) && ch < fbo->h)
- ch = fbo->h;
-
- if (fbo->w == cw && fbo->h == ch && fbo->iformat == iformat)
+ if ((flags & FBOTEX_FUZZY_W) && cw < fbo->rw)
+ cw = fbo->rw;
+ if ((flags & FBOTEX_FUZZY_H) && ch < fbo->rh)
+ ch = fbo->rh;
+
+ if (fbo->rw == cw && fbo->rh == ch && fbo->iformat == iformat) {
+ fbo->lw = w;
+ fbo->lh = h;
return true;
+ }
+
+ int lw = w, lh = h;
if (flags & FBOTEX_FUZZY_W)
w = MP_ALIGN_UP(w, 256);
@@ -384,12 +389,15 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
*fbo = (struct fbotex) {
.gl = gl,
- .w = w,
- .h = h,
+ .rw = w,
+ .rh = h,
+ .lw = lw,
+ .lh = lh,
.iformat = iformat,
};
- mp_verbose(log, "Create FBO: %dx%d\n", fbo->w, fbo->h);
+ mp_verbose(log, "Create FBO: %dx%d -> %dx%d\n", fbo->lw, fbo->lh,
+ fbo->rw, fbo->rh);
if (!(gl->mpgl_caps & MPGL_CAP_FB))
return false;
@@ -397,7 +405,7 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
gl->GenFramebuffers(1, &fbo->fbo);
gl->GenTextures(1, &fbo->texture);
gl->BindTexture(GL_TEXTURE_2D, fbo->texture);
- gl->TexImage2D(GL_TEXTURE_2D, 0, format.internal_format, fbo->w, fbo->h, 0,
+ gl->TexImage2D(GL_TEXTURE_2D, 0, format.internal_format, fbo->rw, fbo->rh, 0,
format.format, format.type, NULL);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
@@ -977,7 +985,7 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc)
}
ADD(frag, "void main() {\n");
// we require _all_ frag shaders to write to a "vec4 color"
- ADD(frag, "vec4 color;\n");
+ ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n");
ADD(frag, "%s", sc->text);
if (gl->glsl_version >= 130) {
ADD(frag, "out_color = color;\n");
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 3ec6077bf5..a4a6cac302 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -71,7 +71,8 @@ struct fbotex {
GLuint texture;
GLenum iformat;
GLenum tex_filter;
- int w, h; // size of .texture
+ int rw, rh; // real (texture) size
+ int lw, lh; // logical (configured) size
};
bool fbotex_init(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
@@ -90,6 +91,11 @@ struct gl_transform {
float t[2];
};
+static const struct gl_transform identity_trans = {
+ .m = {{1.0, 0.0}, {0.0, 1.0}},
+ .t = {0.0, 0.0},
+};
+
void gl_transform_ortho(struct gl_transform *t, float x0, float x1,
float y0, float y1);
@@ -112,6 +118,18 @@ static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r)
gl_transform_vec(t, &r->x1, &r->y1);
}
+static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b)
+{
+ for (int x = 0; x < 2; x++) {
+ for (int y = 0; y < 2; y++) {
+ if (a.m[x][y] != b.m[x][y])
+ return false;
+ }
+ }
+
+ return a.t[0] == b.t[0] && a.t[1] == b.t[1];
+}
+
void gl_transform_trans(struct gl_transform t, struct gl_transform *x);
void gl_set_debug_logger(GL *gl, struct mp_log *log);
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index c10e16fe41..e561af762e 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -106,21 +106,36 @@ struct video_image {
struct mp_image *mpi; // original input image
};
-struct fbosurface {
- struct fbotex fbotex;
- double pts;
+enum plane_type {
+ PLANE_NONE = 0,
+ PLANE_RGB,
+ PLANE_LUMA,
+ PLANE_CHROMA,
+ PLANE_ALPHA,
+ PLANE_XYZ,
};
-#define FBOSURFACES_MAX 10
-
-struct src_tex {
+// A self-contained description of a source image which can be bound to a
+// texture unit and sampled from. Contains metadata about how it's to be used
+struct img_tex {
+ enum plane_type type; // must be set to something non-zero
+ int components; // number of relevant coordinates
+ float multiplier; // multiplier to be used when sampling
GLuint gl_tex;
GLenum gl_target;
bool use_integer;
+ int tex_w, tex_h;
int w, h;
- struct mp_rect_f src;
+ struct gl_transform transform;
+};
+
+struct fbosurface {
+ struct fbotex fbotex;
+ double pts;
};
+#define FBOSURFACES_MAX 10
+
struct cached_file {
char *path;
char *body;
@@ -169,15 +184,15 @@ struct gl_video {
bool dumb_mode;
bool forced_dumb_mode;
- struct fbotex chroma_merge_fbo;
- struct fbotex chroma_deband_fbo;
+ struct fbotex merge_fbo[4];
+ struct fbotex deband_fbo[4];
+ struct fbotex scale_fbo[4];
+ struct fbotex integer_fbo[4];
struct fbotex indirect_fbo;
struct fbotex blend_subs_fbo;
struct fbotex unsharp_fbo;
struct fbotex output_fbo;
- struct fbotex deband_fbo;
struct fbosurface surfaces[FBOSURFACES_MAX];
- struct fbotex integer_conv_fbo[TEXUNIT_VIDEO_NUM];
// these are duplicated so we can keep rendering back and forth between
// them to support an unlimited number of shader passes per step
@@ -203,11 +218,11 @@ struct gl_video {
int vp_w, vp_h;
// temporary during rendering
- struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ int pass_tex_num;
int texture_w, texture_h;
struct gl_transform texture_offset; // texture transform without rotation
bool use_linear;
- bool use_normalized_range;
float user_gamma;
int frames_uploaded;
@@ -648,15 +663,16 @@ static void uninit_rendering(struct gl_video *p)
gl->DeleteBuffers(1, &p->nnedi3_weights_buffer);
p->nnedi3_weights_buffer = 0;
- fbotex_uninit(&p->chroma_merge_fbo);
- fbotex_uninit(&p->chroma_deband_fbo);
+ for (int n = 0; n < 4; n++) {
+ fbotex_uninit(&p->merge_fbo[n]);
+ fbotex_uninit(&p->deband_fbo[n]);
+ fbotex_uninit(&p->scale_fbo[n]);
+ fbotex_uninit(&p->integer_fbo[n]);
+ }
+
fbotex_uninit(&p->indirect_fbo);
fbotex_uninit(&p->blend_subs_fbo);
fbotex_uninit(&p->unsharp_fbo);
- fbotex_uninit(&p->deband_fbo);
-
- for (int n = 0; n < 4; n++)
- fbotex_uninit(&p->integer_conv_fbo[n]);
for (int n = 0; n < 2; n++) {
fbotex_uninit(&p->pre_fbo[n]);
@@ -713,25 +729,45 @@ void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
reinit_rendering(p);
}
-static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo,
- int w, int h, int id)
+// Fill an img_tex struct from an FBO + some metadata
+static struct img_tex img_tex_fbo(struct fbotex *fbo, struct gl_transform t,
+ enum plane_type type, int components)
{
- p->pass_tex[id] = (struct src_tex){
- .gl_tex = src_fbo->texture,
+ assert(type != PLANE_NONE);
+ return (struct img_tex){
+ .type = type,
+ .gl_tex = fbo->texture,
.gl_target = GL_TEXTURE_2D,
- .w = src_fbo->w,
- .h = src_fbo->h,
- .src = {0, 0, w, h},
+ .multiplier = 1.0,
+ .use_integer = false,
+ .tex_w = fbo->rw,
+ .tex_h = fbo->rh,
+ .w = fbo->lw,
+ .h = fbo->lh,
+ .transform = t,
+ .components = components,
};
}
-static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg,
- struct gl_transform *chroma)
+// Bind an img_tex to a free texture unit and return its ID. At most
+// TEXUNIT_VIDEO_NUM texture units can be bound at once
+static int pass_bind(struct gl_video *p, struct img_tex tex)
{
- *chroma = (struct gl_transform){{{0}}};
+ assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM);
+ p->pass_tex[p->pass_tex_num] = tex;
+ return p->pass_tex_num++;
+}
+// Places a video_image's image textures + associated metadata into tex[]. The
+// number of textures is equal to p->plane_count.
+static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg,
+ struct img_tex tex[4])
+{
assert(vimg->mpi);
+ // Determine the chroma offset
+ struct gl_transform chroma = (struct gl_transform){{{0}}};
+
float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
@@ -743,25 +779,51 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
// so that the luma and chroma sample line up exactly.
// For 4:4:4, setting chroma location should have no effect at all.
// luma sample size (in chroma coord. space)
- chroma->t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
- chroma->t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
+ chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
}
// Make sure luma/chroma sizes are aligned.
// Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2
// so luma (3,3) has to align with chroma (2,2).
- chroma->m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w;
- chroma->m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h;
+ chroma.m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w;
+ chroma.m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h;
+
+ // The existing code assumes we just have a single tex multiplier for
+ // all of the planes. This may change in the future
+ float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.colorspace,
+ p->image_desc.component_bits,
+ p->image_desc.component_full_bits);
+ memset(tex, 0, 4 * sizeof(tex[0]));
for (int n = 0; n < p->plane_count; n++) {
struct texplane *t = &vimg->planes[n];
- p->pass_tex[n] = (struct src_tex){
+
+ enum plane_type type;
+ if (n >= 3) {
+ type = PLANE_ALPHA;
+ } else if (p->image_desc.flags & MP_IMGFLAG_RGB) {
+ type = PLANE_RGB;
+ } else if (p->image_desc.flags & MP_IMGFLAG_YUV) {
+ type = n == 0 ? PLANE_LUMA : PLANE_CHROMA;
+ } else if (p->image_desc.flags & MP_IMGFLAG_XYZ) {
+ type = PLANE_XYZ;
+ } else {
+ abort();
+ }
+
+ tex[n] = (struct img_tex){
+ .type = type,
.gl_tex = t->gl_texture,
.gl_target = t->gl_target,
+ .multiplier = tex_mul,
.use_integer = t->use_integer,
+ .tex_w = t->w,
+ .tex_h = t->h,
.w = t->w,
.h = t->h,
- .src = {0, 0, t->w, t->h},
+ .transform = type == PLANE_CHROMA ? chroma : identity_trans,
+ .components = p->image_desc.components[n],
};
}
}
@@ -864,8 +926,8 @@ static void pass_prepare_src_tex(struct gl_video *p)
GL *gl = p->gl;
struct gl_shader_cache *sc = p->sc;
- for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
- struct src_tex *s = &p->pass_tex[n];
+ for (int n = 0; n < p->pass_tex_num; n++) {
+ struct img_tex *s = &p->pass_tex[n];
if (!s->gl_tex)
continue;
@@ -883,8 +945,8 @@ static void pass_prepare_src_tex(struct gl_video *p)
}
float f[2] = {1, 1};
if (s->gl_target != GL_TEXTURE_RECTANGLE) {
- f[0] = s->w;
- f[1] = s->h;
+ f[0] = s->tex_w;
+ f[1] = s->tex_h;
}
gl_sc_uniform_vec2(sc, texture_size, f);
gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0],
@@ -914,17 +976,19 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
struct vertex *v = &va[n];
v->position.x = x[n / 2];
v->position.y = y[n % 2];
- for (int i = 0; i < TEXUNIT_VIDEO_NUM; i++) {
- struct src_tex *s = &p->pass_tex[i];
- if (s->gl_tex) {
- float tx[2] = {s->src.x0, s->src.x1};
- float ty[2] = {s->src.y0, s->src.y1};
- if (flags & 4)
- MPSWAP(float, ty[0], ty[1]);
- bool rect = s->gl_target == GL_TEXTURE_RECTANGLE;
- v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->w);
- v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->h);
- }
+ for (int i = 0; i < p->pass_tex_num; i++) {
+ struct img_tex *s = &p->pass_tex[i];
+ if (!s->gl_tex)
+ continue;
+ struct mp_rect_f src_rect = {0, 0, s->w, s->h};
+ gl_transform_rect(s->transform, &src_rect);
+ float tx[2] = {src_rect.x0, src_rect.x1};
+ float ty[2] = {src_rect.y0, src_rect.y1};
+ if (flags & 4)
+ MPSWAP(float, ty[0], ty[1]);
+ bool rect = s->gl_target == GL_TEXTURE_RECTANGLE;
+ v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->tex_w);
+ v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->tex_h);
}
}
@@ -955,23 +1019,22 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
render_pass_quad(p, vp_w, vp_h, dst, flags);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
memset(&p->pass_tex, 0, sizeof(p->pass_tex));
+ p->pass_tex_num = 0;
}
// dst_fbo: this will be used for rendering; possibly reallocating the whole
// FBO, if the required parameters have changed
// w, h: required FBO target dimension, and also defines the target rectangle
// used for rasterization
-// tex: the texture unit to load the result back into
// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy
// flags allows the FBO to be larger than the w/h parameters)
static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
- int w, int h, int tex, int flags)
+ int w, int h, int flags)
{
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
- finish_pass_direct(p, dst_fbo->fbo, dst_fbo->w, dst_fbo->h,
+ finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
&(struct mp_rect){0, 0, w, h}, 0);
- pass_load_fbotex(p, dst_fbo, w, h, tex);
}
static void uninit_scaler(struct gl_video *p, struct scaler *scaler)
@@ -1008,8 +1071,8 @@ static const char *get_custom_shader_fn(struct gl_video *p, const char *body)
// Applies an arbitrary number of shaders in sequence, using the given pair
// of FBOs as intermediate buffers. Returns whether any shaders were applied.
-static bool apply_shaders(struct gl_video *p, char **shaders,
- struct fbotex textures[2], int tex_num, int w, int h)
+static bool apply_shaders(struct gl_video *p, char **shaders, int w, int h,
+ struct fbotex textures[2])
{
if (!shaders)
return false;
@@ -1019,13 +1082,15 @@ static bool apply_shaders(struct gl_video *p, char **shaders,
const char *body = load_cached_file(p, shaders[n]);
if (!body)
continue;
- finish_pass_fbo(p, &textures[tex], w, h, tex_num, 0);
- GLSLHF("#define pixel_size pixel_size%d\n", tex_num);
+ finish_pass_fbo(p, &textures[tex], w, h, 0);
+ int id = pass_bind(p, img_tex_fbo(&textures[tex], identity_trans,
+ PLANE_RGB, 4));
+ GLSLHF("#define pixel_size pixel_size%d\n", id);
load_shader(p, body);
const char *fn_name = get_custom_shader_fn(p, body);
GLSLF("// custom shader\n");
GLSLF("color = %s(texture%d, texcoord%d, texture_size%d);\n",
- fn_name, tex_num, tex_num, tex_num);
+ fn_name, id, id, id);
tex = (tex+1) % 2;
success = true;
}
@@ -1165,46 +1230,52 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
}
// Special helper for sampling from two separated stages
-static void pass_sample_separated(struct gl_video *p, int src_tex,
- struct scaler *scaler, int w, int h,
- struct gl_transform transform)
+static void pass_sample_separated(struct gl_video *p, struct img_tex src,
+ struct scaler *scaler, int w, int h)
{
- // Keep the x components untouched for the first pass
- struct mp_rect_f src_new = p->pass_tex[src_tex].src;
- gl_transform_rect(transform, &src_new);
+ // Separate the transformation into x and y components, per pass
+ struct gl_transform t_x = {
+ .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}},
+ .t = {src.transform.t[0], 0.0},
+ };
+ struct gl_transform t_y = {
+ .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}},
+ .t = {0.0, src.transform.t[1]},
+ };
+
+ // First pass (scale only in the y dir)
+ src.transform = t_y;
+ sampler_prelude(p->sc, pass_bind(p, src));
GLSLF("// pass 1\n");
- p->pass_tex[src_tex].src.y0 = src_new.y0;
- p->pass_tex[src_tex].src.y1 = src_new.y1;
pass_sample_separated_gen(p->sc, scaler, 0, 1);
- int src_w = p->pass_tex[src_tex].src.x1 - p->pass_tex[src_tex].src.x0;
- finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H);
- // Restore the sample source for the second pass
- sampler_prelude(p->sc, src_tex);
+ GLSLF("color *= %f;\n", src.multiplier);
+ finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H);
+
+ // Second pass (scale only in the x dir)
+ src = img_tex_fbo(&scaler->sep_fbo, t_x, src.type, src.components);
+ sampler_prelude(p->sc, pass_bind(p, src));
GLSLF("// pass 2\n");
- p->pass_tex[src_tex].src.x0 = src_new.x0;
- p->pass_tex[src_tex].src.x1 = src_new.x1;
pass_sample_separated_gen(p->sc, scaler, 1, 0);
}
-// Sample. This samples from the texture ID given by src_tex. It's hardcoded to
-// use all variables and values associated with it (which includes textureN,
-// texcoordN and texture_sizeN).
-// The src rectangle is implicit in p->pass_tex + transform.
+// Sample from img_tex, with the src rectangle given by it.
// The dst rectangle is implicit by what the caller will do next, but w and h
// must still be what is going to be used (to dimension FBOs correctly).
// This will write the scaled contents to the vec4 "color".
// The scaler unit is initialized by this function; in order to avoid cache
// thrashing, the scaler unit should usually use the same parameters.
-static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
- const struct scaler_config *conf, double scale_factor,
- int w, int h, struct gl_transform transform)
+static void pass_sample(struct gl_video *p, struct img_tex tex,
+ struct scaler *scaler, const struct scaler_config *conf,
+ double scale_factor, int w, int h)
{
reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
- sampler_prelude(p->sc, src_tex);
- // Set up the transformation for everything other than separated scaling
- if (!scaler->kernel || scaler->kernel->polar)
- gl_transform_rect(transform, &p->pass_tex[src_tex].src);
+ bool is_separated = scaler->kernel && !scaler->kernel->polar;
+
+ // Set up the transformation+prelude and bind the texture, for everything
+ // other than separated scaling (which does this in the subfunction)
+ if (!is_separated)
+ sampler_prelude(p->sc, pass_bind(p, tex));
// Dispatch the scaler. They're all wildly different.
const char *name = scaler->conf.kernel.name;
@@ -1227,22 +1298,37 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
} else if (scaler->kernel && scaler->kernel->polar) {
pass_sample_polar(p->sc, scaler);
} else if (scaler->kernel) {
- pass_sample_separated(p, src_tex, scaler, w, h, transform);
+ pass_sample_separated(p, tex, scaler, w, h);
} else {
// Should never happen
abort();
}
+ // Apply any required multipliers. Separated scaling already does this in
+ // its first stage
+ if (!is_separated)
+ GLSLF("color *= %f;\n", tex.multiplier);
+
// Micro-optimization: Avoid scaling unneeded channels
if (!p->has_alpha || p->opts.alpha_mode != 1)
GLSL(color.a = 1.0;)
}
// Get the number of passes for prescaler, with given display size.
-static int get_prescale_passes(struct gl_video *p)
+static int get_prescale_passes(struct gl_video *p, struct img_tex tex[4])
{
if (!p->opts.prescale)
return 0;
+
+ // Return 0 if no luma planes exist
+ for (int n = 0; ; n++) {
+ if (n > 4)
+ return 0;
+
+ if (tex[n].type == PLANE_LUMA)
+ break;
+ }
+
// The downscaling threshold check is turned off.
if (p->opts.prescale_downscaling_threshold < 1.0f)
return p->opts.prescale_passes;
@@ -1265,283 +1351,298 @@ static int get_prescale_passes(struct gl_video *p)
return passes;
}
-// apply pre-scalers
-static void pass_prescale(struct gl_video *p, int src_tex_num, int dst_tex_num,
- int planes, int w, int h, int passes,
- float tex_mul, struct gl_transform *offset)
+// Upload the NNEDI3 UBO weights only if needed
+static void upload_nnedi3_weights(struct gl_video *p)
{
- *offset = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
+ GL *gl = p->gl;
- int tex_num = src_tex_num;
+ if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO &&
+ !p->nnedi3_weights_buffer)
+ {
+ gl->GenBuffers(1, &p->nnedi3_weights_buffer);
+ gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, p->nnedi3_weights_buffer);
+
+ int size;
+ const float *weights = get_nnedi3_weights(p->opts.nnedi3_opts, &size);
+
+ MP_VERBOSE(p, "Uploading NNEDI3 weights via UBO (size=%d)\n", size);
+
+ // We don't know the endianness of GPU, just assume it's LE
+ gl->BufferData(GL_UNIFORM_BUFFER, size, weights, GL_STATIC_DRAW);
+ }
+}
+// Applies a single pass of the prescaler, and accumulates the offset in
+// pass_transform.
+static void pass_prescale(struct gl_video *p, struct img_tex *tex,
+ struct gl_transform *pass_transform,
+ struct fbotex fbo[MAX_PRESCALE_STEPS])
+{
// Happens to be the same for superxbr and nnedi3.
- const int steps_per_pass = 2;
+ const int num_steps = 2;
- for (int pass = 0; pass < passes; pass++) {
- for (int step = 0; step < steps_per_pass; step++) {
- struct gl_transform transform = {{{0}}};
+ for (int step = 0; step < num_steps; step++) {
+ struct gl_transform step_transform = {{{0}}};
+ int id = pass_bind(p, *tex);
- switch(p->opts.prescale) {
- case 1:
- pass_superxbr(p->sc, planes, tex_num, step,
- tex_mul, p->opts.superxbr_opts, &transform);
- break;
- case 2:
- pass_nnedi3(p->gl, p->sc, planes, tex_num, step,
- tex_mul, p->opts.nnedi3_opts, &transform);
- break;
- default:
- abort();
- }
+ switch(p->opts.prescale) {
+ case 1:
+ pass_superxbr(p->sc, tex->components, id, step, tex->multiplier,
+ p->opts.superxbr_opts, &step_transform);
+ break;
+ case 2:
+ upload_nnedi3_weights(p);
+ pass_nnedi3(p->gl, p->sc, tex->components, id, step, tex->multiplier,
+ p->opts.nnedi3_opts, &step_transform);
+ break;
+ default:
+ abort();
+ }
- tex_mul = 1.0;
+ int new_w = tex->w * (int)step_transform.m[0][0],
+ new_h = tex->h * (int)step_transform.m[1][1];
- gl_transform_trans(transform, offset);
+ finish_pass_fbo(p, &fbo[step], new_w, new_h, 0);
+ *tex = img_tex_fbo(&fbo[step], identity_trans, tex->type, tex->components);
- w *= (int)transform.m[0][0];
- h *= (int)transform.m[1][1];
+ // Accumulate the local transform
+ gl_transform_trans(step_transform, pass_transform);
+ }
+}
- finish_pass_fbo(p, &p->prescale_fbo[pass][step],
- w, h, dst_tex_num, 0);
- tex_num = dst_tex_num;
- }
+// Copy a texture to the vec4 color, while increasing offset. Also applies
+// the texture multiplier to the sampled color
+static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img)
+{
+ int count = img.components;
+ assert(*offset + count <= 4);
+
+ int id = pass_bind(p, img);
+ const char *src = "wzyx" + (4 - count);
+ const char *dst = (const char*[4]){"wzyx", "wzy", "wz", "w"}[*offset]
+ + (4 - *offset - count);
+
+ if (img.use_integer) {
+ uint64_t tex_max = 1ull << p->image_desc.component_full_bits;
+ img.multiplier *= 1.0 / (tex_max - 1);
}
+
+ GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n",
+ dst, img.multiplier, id, id, src);
+
+ *offset += count;
}
-// Prescale the planes from the main textures.
-static bool pass_prescale_luma(struct gl_video *p, float tex_mul,
- struct gl_transform *chromafix,
- struct gl_transform *transform,
- struct src_tex *prescaled_tex,
- int *prescaled_planes)
+// sample from video textures, set "color" variable to yuv value
+static void pass_read_video(struct gl_video *p)
{
- if (p->opts.prescale == 2 &&
- p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO)
- {
- // nnedi3 are configured to use uniform buffer objects.
- if (!p->nnedi3_weights_buffer) {
- p->gl->GenBuffers(1, &p->nnedi3_weights_buffer);
- p->gl->BindBufferBase(GL_UNIFORM_BUFFER, 0,
- p->nnedi3_weights_buffer);
- int weights_size;
- const float *weights =
- get_nnedi3_weights(p->opts.nnedi3_opts, &weights_size);
-
- MP_VERBOSE(p, "Uploading NNEDI3 weights via uniform buffer (size=%d)\n",
- weights_size);
-
- // We don't know the endianness of GPU, just assume it's little
- // endian.
- p->gl->BufferData(GL_UNIFORM_BUFFER, weights_size, weights,
- GL_STATIC_DRAW);
- }
+ struct img_tex tex[4];
+ pass_get_img_tex(p, &p->image, tex);
+
+ // Most of the steps here don't actually apply image transformations yet,
+ // save for the actual upscaling - so as a code convenience we store them
+ // separately
+ struct gl_transform transforms[4];
+ struct gl_transform tex_trans = identity_trans;
+ for (int i = 0; i < 4; i++) {
+ transforms[i] = tex[i].transform;
+ tex[i].transform = identity_trans;
}
- // number of passes to apply prescaler, can be zero.
- int prescale_passes = get_prescale_passes(p);
- if (prescale_passes == 0)
- return false;
+ int prescale_passes = get_prescale_passes(p, tex);
- p->use_normalized_range = true;
+ int dst_w = p->texture_w << prescale_passes,
+ dst_h = p->texture_h << prescale_passes;
- // estimate a safe upperbound of planes being prescaled on texture0.
- *prescaled_planes = p->is_yuv ? 1 :
- (!p->color_swizzle[0] || p->color_swizzle[3] == 'a') ? 3 : 4;
+ bool needs_deband[4];
+ int scaler_id[4]; // ID if needed, -1 otherwise
+ int needs_prescale[4]; // number of prescaling passes left
- struct src_tex tex_backup[4];
- for (int i = 0; i < 4; i++)
- tex_backup[i] = p->pass_tex[i];
+ // Determine what needs to be done for which plane
+ for (int i=0; i < 4; i++) {
+ enum plane_type type = tex[i].type;
+ if (type == PLANE_NONE) {
+ needs_deband[i] = false;
+ needs_prescale[i] = 0;
+ scaler_id[i] = -1;
+ continue;
+ }
- if (p->opts.deband) {
- // apply debanding before upscaling.
- pass_sample_deband(p->sc, p->opts.deband_opts, 0, p->pass_tex[0].gl_target,
- tex_mul, &p->lfg);
- finish_pass_fbo(p, &p->deband_fbo, p->texture_w,
- p->texture_h, 0, 0);
- tex_backup[0] = p->pass_tex[0];
- }
+ needs_deband[i] = type != PLANE_ALPHA ? p->opts.deband : false;
+ needs_prescale[i] = type == PLANE_LUMA ? prescale_passes : 0;
- // process texture0 and store the result in texture4.
- pass_prescale(p, 0, 4, *prescaled_planes, p->texture_w, p->texture_h,
- prescale_passes, p->opts.deband ? 1.0 : tex_mul, transform);
+ scaler_id[i] = -1;
+ switch (type) {
+ case PLANE_RGB:
+ case PLANE_LUMA:
+ case PLANE_XYZ:
+ scaler_id[i] = 0; // scale
+ break;
- // correct the chromafix under new transform.
- chromafix->t[0] -= transform->t[0] / transform->m[0][0];
- chromafix->t[1] -= transform->t[1] / transform->m[1][1];
+ case PLANE_CHROMA:
+ scaler_id[i] = 2; // cscale
+ break;
- // restore the first four texture.
- for (int i = 0; i < 4; i++)
- p->pass_tex[i] = tex_backup[i];
+ case PLANE_ALPHA: // always use bilinear for alpha
+ default:
+ continue;
+ }
- // backup texture4 for later use.
- *prescaled_tex = p->pass_tex[4];
+ // We can skip scaling if the texture is already at the required size
+ if (tex[i].w == dst_w && tex[i].h == dst_h)
+ scaler_id[i] = -1;
+ }
- return true;
-}
+ // Process all the planes that need some action performed
+ while (true) {
+ // Find next plane to operate on
+ int n = -1;
+ for (int i = 0; i < 4; i++) {
+ if (tex[i].type != PLANE_NONE &&
+ (scaler_id[i] >= 0 || needs_deband[i] || needs_prescale[i]))
+ {
+ n = i;
+ break;
+ }
+ }
-// The input textures are in an integer format (non-fixed-point), like R16UI.
-// Convert it to float in an extra pass.
-static void pass_integer_conversion(struct gl_video *p, bool *chroma_merging)
-{
- double tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace,
- p->image_desc.component_bits,
- p->image_desc.component_full_bits);
- uint64_t tex_max = 1ull << p->image_desc.component_full_bits;
- tex_mul *= 1.0 / (tex_max - 1);
+ if (n == -1) // no textures left
+ break;
- struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
- assert(sizeof(pass_tex) == sizeof(p->pass_tex));
- memcpy(pass_tex, p->pass_tex, sizeof(pass_tex));
+ // Figure out if it needs to be merged with anything else first
+ int o = -1;
+ for (int i = n+1; i < 4; i++) {
+ if (tex[i].type == tex[n].type
+ && tex[i].w == tex[n].