diff options
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r-- | video/out/opengl/video.c | 1109 |
1 files changed, 621 insertions, 488 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index c10e16fe41..8807b65005 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -106,21 +106,38 @@ struct video_image { struct mp_image *mpi; // original input image }; -struct fbosurface { - struct fbotex fbotex; - double pts; +enum plane_type { + PLANE_NONE = 0, + PLANE_RGB, + PLANE_LUMA, + PLANE_CHROMA, + PLANE_ALPHA, + PLANE_XYZ, }; -#define FBOSURFACES_MAX 10 - -struct src_tex { +// A self-contained description of a source image which can be bound to a +// texture unit and sampled from. Contains metadata about how it's to be used +struct img_tex { + enum plane_type type; // must be set to something non-zero + int components; // number of relevant coordinates + float multiplier; // multiplier to be used when sampling GLuint gl_tex; GLenum gl_target; bool use_integer; - int w, h; - struct mp_rect_f src; + int tex_w, tex_h; // source texture size + int w, h; // logical size (with pre_transform applied) + struct gl_transform pre_transform; // source texture space + struct gl_transform transform; // rendering transformation + bool texture_la; // it's a GL_LUMINANCE_ALPHA texture (access with .ra not .rg) }; +struct fbosurface { + struct fbotex fbotex; + double pts; +}; + +#define FBOSURFACES_MAX 10 + struct cached_file { char *path; char *body; @@ -132,6 +149,7 @@ struct gl_video { struct mpv_global *global; struct mp_log *log; struct gl_video_opts opts; + struct gl_lcms *cms; bool gl_debug; int texture_16bit_depth; // actual bits available in 16 bit textures @@ -169,15 +187,15 @@ struct gl_video { bool dumb_mode; bool forced_dumb_mode; - struct fbotex chroma_merge_fbo; - struct fbotex chroma_deband_fbo; + struct fbotex merge_fbo[4]; + struct fbotex deband_fbo[4]; + struct fbotex scale_fbo[4]; + struct fbotex integer_fbo[4]; struct fbotex indirect_fbo; struct fbotex blend_subs_fbo; struct fbotex unsharp_fbo; struct fbotex output_fbo; - struct fbotex deband_fbo; struct fbosurface surfaces[FBOSURFACES_MAX]; - struct fbotex integer_conv_fbo[TEXUNIT_VIDEO_NUM]; // these are duplicated so we can keep rendering back and forth between // them to support an unlimited number of shader passes per step @@ -192,8 +210,8 @@ struct gl_video { bool is_interpolated; bool output_fbo_valid; - // state for luma (0), luma-down(1), chroma (2) and temporal (3) scalers - struct scaler scaler[4]; + // state for configured scalers + struct scaler scaler[SCALER_COUNT]; struct mp_csp_equalizer video_eq; @@ -203,11 +221,12 @@ struct gl_video { int vp_w, vp_h; // temporary during rendering - struct src_tex pass_tex[TEXUNIT_VIDEO_NUM]; + struct img_tex pass_tex[TEXUNIT_VIDEO_NUM]; + int pass_tex_num; int texture_w, texture_h; struct gl_transform texture_offset; // texture transform without rotation + int components; bool use_linear; - bool use_normalized_range; float user_gamma; int frames_uploaded; @@ -418,10 +437,10 @@ const struct m_sub_options gl_video_conf = { OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), OPT_FLAG("pbo", pbo, 0), - SCALER_OPTS("scale", 0), - SCALER_OPTS("dscale", 1), - SCALER_OPTS("cscale", 2), - SCALER_OPTS("tscale", 3), + SCALER_OPTS("scale", SCALER_SCALE), + SCALER_OPTS("dscale", SCALER_DSCALE), + SCALER_OPTS("cscale", SCALER_CSCALE), + SCALER_OPTS("tscale", SCALER_TSCALE), OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10), OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0), OPT_FLAG("linear-scaling", linear_scaling, 0), @@ -470,7 +489,7 @@ const struct m_sub_options gl_video_conf = { OPT_FLAG("deband", deband, 0), OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), OPT_FLOAT("sharpen", unsharp, 0), - OPT_CHOICE("prescale", prescale, 0, + OPT_CHOICE("prescale-luma", prescale_luma, 0, ({"none", 0}, {"superxbr", 1} #if HAVE_NNEDI @@ -505,6 +524,7 @@ const struct m_sub_options gl_video_conf = { OPT_REPLACED("smoothmotion-threshold", "tscale-param1"), OPT_REPLACED("scale-down", "dscale"), OPT_REPLACED("fancy-downscaling", "correct-downscaling"), + OPT_REPLACED("prescale", "prescale-luma"), {0} }, @@ -518,7 +538,7 @@ static void check_gl_features(struct gl_video *p); static bool init_format(int fmt, struct gl_video *init); static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi); static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src); -static void get_scale_factors(struct gl_video *p, double xy[2]); +static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); #define GLSL(x) gl_sc_add(p->sc, #x "\n"); #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) @@ -639,7 +659,7 @@ static void uninit_rendering(struct gl_video *p) { GL *gl = p->gl; - for (int n = 0; n < 4; n++) + for (int n = 0; n < SCALER_COUNT; n++) uninit_scaler(p, &p->scaler[n]); gl->DeleteTextures(1, &p->dither_texture); @@ -648,15 +668,16 @@ static void uninit_rendering(struct gl_video *p) gl->DeleteBuffers(1, &p->nnedi3_weights_buffer); p->nnedi3_weights_buffer = 0; - fbotex_uninit(&p->chroma_merge_fbo); - fbotex_uninit(&p->chroma_deband_fbo); + for (int n = 0; n < 4; n++) { + fbotex_uninit(&p->merge_fbo[n]); + fbotex_uninit(&p->deband_fbo[n]); + fbotex_uninit(&p->scale_fbo[n]); + fbotex_uninit(&p->integer_fbo[n]); + } + fbotex_uninit(&p->indirect_fbo); fbotex_uninit(&p->blend_subs_fbo); fbotex_uninit(&p->unsharp_fbo); - fbotex_uninit(&p->deband_fbo); - - for (int n = 0; n < 4; n++) - fbotex_uninit(&p->integer_conv_fbo[n]); for (int n = 0; n < 2; n++) { fbotex_uninit(&p->pre_fbo[n]); @@ -674,21 +695,31 @@ static void uninit_rendering(struct gl_video *p) gl_video_reset_surfaces(p); } -void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d) +void gl_video_update_profile(struct gl_video *p) +{ + if (p->use_lut_3d) + return; + + p->use_lut_3d = true; + check_gl_features(p); + + reinit_rendering(p); +} + +static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, + enum mp_csp_trc trc) { GL *gl = p->gl; - if (!lut3d) { - if (p->use_lut_3d) { - p->use_lut_3d = false; - reinit_rendering(p); - } - return; - } + if (!p->cms || !p->use_lut_3d) + return false; - if (!(gl->mpgl_caps & MPGL_CAP_3D_TEX) || gl->es) { - MP_ERR(p, "16 bit fixed point 3D textures not available.\n"); - return; + if (!gl_lcms_has_changed(p->cms, prim, trc)) + return true; + + struct lut3d *lut3d = NULL; + if (!gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc) || !lut3d) { + return false; } if (!p->lut_3d_texture) @@ -705,33 +736,76 @@ void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d) gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); gl->ActiveTexture(GL_TEXTURE0); - p->use_lut_3d = true; - check_gl_features(p); - debug_check_gl(p, "after 3d lut creation"); - reinit_rendering(p); + return true; } -static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo, - int w, int h, int id) +// Fill an img_tex struct from an FBO + some metadata +static struct img_tex img_tex_fbo(struct fbotex *fbo, struct gl_transform t, + enum plane_type type, int components) { - p->pass_tex[id] = (struct src_tex){ - .gl_tex = src_fbo->texture, + assert(type != PLANE_NONE); + return (struct img_tex){ + .type = type, + .gl_tex = fbo->texture, .gl_target = GL_TEXTURE_2D, - .w = src_fbo->w, - .h = src_fbo->h, - .src = {0, 0, w, h}, + .multiplier = 1.0, + .use_integer = false, + .tex_w = fbo->rw, + .tex_h = fbo->rh, + .w = fbo->lw, + .h = fbo->lh, + .pre_transform = identity_trans, + .transform = t, + .components = components, }; } -static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg, - struct gl_transform *chroma) +// Bind an img_tex to a free texture unit and return its ID. At most +// TEXUNIT_VIDEO_NUM texture units can be bound at once +static int pass_bind(struct gl_video *p, struct img_tex tex) +{ + assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM); + p->pass_tex[p->pass_tex_num] = tex; + return p->pass_tex_num++; +} + +// Rotation by 90° and flipping. +static void get_plane_source_transform(struct gl_video *p, int w, int h, + struct gl_transform *out_tr) { - *chroma = (struct gl_transform){{{0}}}; + struct gl_transform tr = identity_trans; + int a = p->image_params.rotate % 90 ? 0 : p->image_params.rotate / 90; + int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc. + int cos90[4] = {1, 0, -1, 0}; + struct gl_transform rot = {{{cos90[a], sin90[a]}, {-sin90[a], cos90[a]}}}; + gl_transform_trans(rot, &tr); + + // basically, recenter to keep the whole image in view + float b[2] = {1, 1}; + gl_transform_vec(rot, &b[0], &b[1]); + tr.t[0] += b[0] < 0 ? w : 0; + tr.t[1] += b[1] < 0 ? h : 0; + if (p->image.image_flipped) { + struct gl_transform flip = {{{1, 0}, {0, -1}}, {0, h}}; + gl_transform_trans(flip, &tr); + } + + *out_tr = tr; +} + +// Places a video_image's image textures + associated metadata into tex[]. The +// number of textures is equal to p->plane_count. +static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, + struct img_tex tex[4]) +{ assert(vimg->mpi); + // Determine the chroma offset + struct gl_transform chroma = (struct gl_transform){{{0}}}; + float ls_w = 1.0 / (1 << p->image_desc.chroma_xs); float ls_h = 1.0 / (1 << p->image_desc.chroma_ys); @@ -743,26 +817,56 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg // so that the luma and chroma sample line up exactly. // For 4:4:4, setting chroma location should have no effect at all. // luma sample size (in chroma coord. space) - chroma->t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; - chroma->t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; + chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; } // Make sure luma/chroma sizes are aligned. // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2 // so luma (3,3) has to align with chroma (2,2). - chroma->m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w; - chroma->m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h; + chroma.m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w; + chroma.m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h; + // The existing code assumes we just have a single tex multiplier for + // all of the planes. This may change in the future + float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.colorspace, + p->image_desc.component_bits, + p->image_desc.component_full_bits); + + memset(tex, 0, 4 * sizeof(tex[0])); for (int n = 0; n < p->plane_count; n++) { struct texplane *t = &vimg->planes[n]; - p->pass_tex[n] = (struct src_tex){ + + enum plane_type type; + if (n >= 3) { + type = PLANE_ALPHA; + } else if (p->image_desc.flags & MP_IMGFLAG_RGB) { + type = PLANE_RGB; + } else if (p->image_desc.flags & MP_IMGFLAG_YUV) { + type = n == 0 ? PLANE_LUMA : PLANE_CHROMA; + } else if (p->image_desc.flags & MP_IMGFLAG_XYZ) { + type = PLANE_XYZ; + } else { + abort(); + } + + tex[n] = (struct img_tex){ + .type = type, .gl_tex = t->gl_texture, .gl_target = t->gl_target, + .multiplier = tex_mul, .use_integer = t->use_integer, + .tex_w = t->w, + .tex_h = t->h, .w = t->w, .h = t->h, - .src = {0, 0, t->w, t->h}, + .transform = type == PLANE_CHROMA ? chroma : identity_trans, + .components = p->image_desc.components[n], + .texture_la = t->gl_format == GL_LUMINANCE_ALPHA, }; + get_plane_source_transform(p, t->w, t->h, &tex[n].pre_transform); + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, tex[n].w, tex[n].h); } } @@ -864,8 +968,8 @@ static void pass_prepare_src_tex(struct gl_video *p) GL *gl = p->gl; struct gl_shader_cache *sc = p->sc; - for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) { - struct src_tex *s = &p->pass_tex[n]; + for (int n = 0; n < p->pass_tex_num; n++) { + struct img_tex *s = &p->pass_tex[n]; if (!s->gl_tex) continue; @@ -883,8 +987,8 @@ static void pass_prepare_src_tex(struct gl_video *p) } float f[2] = {1, 1}; if (s->gl_target != GL_TEXTURE_RECTANGLE) { - f[0] = s->w; - f[1] = s->h; + f[0] = s->tex_w; + f[1] = s->tex_h; } gl_sc_uniform_vec2(sc, texture_size, f); gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0], @@ -896,11 +1000,10 @@ static void pass_prepare_src_tex(struct gl_video *p) gl->ActiveTexture(GL_TEXTURE0); } -// flags = bits 0-1: rotate, bit 2: flip vertically static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, - const struct mp_rect *dst, int flags) + const struct mp_rect *dst) { - struct vertex va[4]; + struct vertex va[4] = {0}; struct gl_transform t; gl_transform_ortho(&t, 0, vp_w, 0, vp_h); @@ -914,30 +1017,21 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, struct vertex *v = &va[n]; v->position.x = x[n / 2]; v->position.y = y[n % 2]; - for (int i = 0; i < TEXUNIT_VIDEO_NUM; i++) { - struct src_tex *s = &p->pass_tex[i]; - if (s->gl_tex) { - float tx[2] = {s->src.x0, s->src.x1}; - float ty[2] = {s->src.y0, s->src.y1}; - if (flags & 4) - MPSWAP(float, ty[0], ty[1]); - bool rect = s->gl_target == GL_TEXTURE_RECTANGLE; - v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->w); - v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->h); - } + for (int i = 0; i < p->pass_tex_num; i++) { + struct img_tex *s = &p->pass_tex[i]; + if (!s->gl_tex) + continue; + struct gl_transform tr = s->transform; + gl_transform_trans(s->pre_transform, &tr); + float tx = (n / 2) * s->w; + float ty = (n % 2) * s->h; + gl_transform_vec(tr, &tx, &ty); + bool rect = s->gl_target == GL_TEXTURE_RECTANGLE; + v->texcoord[i].x = tx / (rect ? 1 : s->tex_w); + v->texcoord[i].y = ty / (rect ? 1 : s->tex_h); } } - int rot = flags & 3; - while (rot--) { - static const int perm[4] = {1, 3, 0, 2}; - struct vertex vb[4]; - memcpy(vb, va, sizeof(vb)); - for (int n = 0; n < 4; n++) - memcpy(va[n].texcoord, vb[perm[n]].texcoord, - sizeof(struct vertex_pt[TEXUNIT_VIDEO_NUM])); - } - p->gl->Viewport(0, 0, vp_w, abs(vp_h)); gl_vao_draw_data(&p->vao, GL_TRIANGLE_STRIP, va, 4); @@ -946,32 +1040,37 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, // flags: see render_pass_quad static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h, - const struct mp_rect *dst, int flags) + const struct mp_rect *dst) { GL *gl = p->gl; pass_prepare_src_tex(p); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); gl_sc_gen_shader_and_reset(p->sc); - render_pass_quad(p, vp_w, vp_h, dst, flags); + render_pass_quad(p, vp_w, vp_h, dst); gl->BindFramebuffer(GL_FRAMEBUFFER, 0); memset(&p->pass_tex, 0, sizeof(p->pass_tex)); + p->pass_tex_num = 0; } // dst_fbo: this will be used for rendering; possibly reallocating the whole // FBO, if the required parameters have changed // w, h: required FBO target dimension, and also defines the target rectangle // used for rasterization -// tex: the texture unit to load the result back into // flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy // flags allows the FBO to be larger than the w/h parameters) static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, - int w, int h, int tex, int flags) + int w, int h, int flags) { fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags); - finish_pass_direct(p, dst_fbo->fbo, dst_fbo->w, dst_fbo->h, - &(struct mp_rect){0, 0, w, h}, 0); - pass_load_fbotex(p, dst_fbo, w, h, tex); + finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh, + &(struct mp_rect){0, 0, w, h}); +} + +static void skip_unused(struct gl_video *p, int num_components) +{ + for (int i = num_components; i < 4; i++) + GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0); } static void uninit_scaler(struct gl_video *p, struct scaler *scaler) @@ -1008,8 +1107,8 @@ static const char *get_custom_shader_fn(struct gl_video *p, const char *body) // Applies an arbitrary number of shaders in sequence, using the given pair // of FBOs as intermediate buffers. Returns whether any shaders were applied. -static bool apply_shaders(struct gl_video *p, char **shaders, - struct fbotex textures[2], int tex_num, int w, int h) +static bool apply_shaders(struct gl_video *p, char **shaders, int w, int h, + struct fbotex textures[2]) { if (!shaders) return false; @@ -1019,13 +1118,15 @@ static bool apply_shaders(struct gl_video *p, char **shaders, const char *body = load_cached_file(p, shaders[n]); if (!body) continue; - finish_pass_fbo(p, &textures[tex], w, h, tex_num, 0); - GLSLHF("#define pixel_size pixel_size%d\n", tex_num); + finish_pass_fbo(p, &textures[tex], w, h, 0); + int id = pass_bind(p, img_tex_fbo(&textures[tex], identity_trans, + PLANE_RGB, p->components)); + GLSLHF("#define pixel_size pixel_size%d\n", id); load_shader(p, body); const char *fn_name = get_custom_shader_fn(p, body); GLSLF("// custom shader\n"); GLSLF("color = %s(texture%d, texcoord%d, texture_size%d);\n", - fn_name, tex_num, tex_num, tex_num); + fn_name, id, id, id); tex = (tex+1) % 2; success = true; } @@ -1165,46 +1266,52 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, } // Special helper for sampling from two separated stages -static void pass_sample_separated(struct gl_video *p, int src_tex, - struct scaler *scaler, int w, int h, - struct gl_transform transform) +static void pass_sample_separated(struct gl_video *p, struct img_tex src, + struct scaler *scaler, int w, int h) { - // Keep the x components untouched for the first pass - struct mp_rect_f src_new = p->pass_tex[src_tex].src; - gl_transform_rect(transform, &src_new); + // Separate the transformation into x and y components, per pass + struct gl_transform t_x = { + .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}}, + .t = {src.transform.t[0], 0.0}, + }; + struct gl_transform t_y = { + .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}}, + .t = {0.0, src.transform.t[1]}, + }; + + // First pass (scale only in the y dir) + src.transform = t_y; + sampler_prelude(p->sc, pass_bind(p, src)); GLSLF("// pass 1\n"); - p->pass_tex[src_tex].src.y0 = src_new.y0; - p->pass_tex[src_tex].src.y1 = src_new.y1; pass_sample_separated_gen(p->sc, scaler, 0, 1); - int src_w = p->pass_tex[src_tex].src.x1 - p->pass_tex[src_tex].src.x0; - finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H); - // Restore the sample source for the second pass - sampler_prelude(p->sc, src_tex); + GLSLF("color *= %f;\n", src.multiplier); + finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H); + + // Second pass (scale only in the x dir) + src = img_tex_fbo(&scaler->sep_fbo, t_x, src.type, src.components); + sampler_prelude(p->sc, pass_bind(p, src)); GLSLF("// pass 2\n"); - p->pass_tex[src_tex].src.x0 = src_new.x0; - p->pass_tex[src_tex].src.x1 = src_new.x1; pass_sample_separated_gen(p->sc, scaler, 1, 0); } -// Sample. This samples from the texture ID given by src_tex. It's hardcoded to -// use all variables and values associated with it (which includes textureN, -// texcoordN and texture_sizeN). -// The src rectangle is implicit in p->pass_tex + transform. +// Sample from img_tex, with the src rectangle given by it. // The dst rectangle is implicit by what the caller will do next, but w and h // must still be what is going to be used (to dimension FBOs correctly). // This will write the scaled contents to the vec4 "color". // The scaler unit is initialized by this function; in order to avoid cache // thrashing, the scaler unit should usually use the same parameters. -static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler, - const struct scaler_config *conf, double scale_factor, - int w, int h, struct gl_transform transform) +static void pass_sample(struct gl_video *p, struct img_tex tex, + struct scaler *scaler, const struct scaler_config *conf, + double scale_factor, int w, int h) { reinit_scaler(p, scaler, conf, scale_factor, filter_sizes); - sampler_prelude(p->sc, src_tex); - // Set up the transformation for everything other than separated scaling - if (!scaler->kernel || scaler->kernel->polar) - gl_transform_rect(transform, &p->pass_tex[src_tex].src); + bool is_separated = scaler->kernel && !scaler->kernel->polar; + + // Set up the transformation+prelude and bind the texture, for everything + // other than separated scaling (which does this in the subfunction) + if (!is_separated) + sampler_prelude(p->sc, pass_bind(p, tex)); // Dispatch the scaler. They're all wildly different. const char *name = scaler->conf.kernel.name; @@ -1227,28 +1334,42 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler, } else if (scaler->kernel && scaler->kernel->polar) { pass_sample_polar(p->sc, scaler); } else if (scaler->kernel) { - pass_sample_separated(p, src_tex, scaler, w, h, transform); + pass_sample_separated(p, tex, scaler, w, h); } else { // Should never happen abort(); } + // Apply any required multipliers. Separated scaling already does this in + // its first stage + if (!is_separated) + GLSLF("color *= %f;\n", tex.multiplier); + // Micro-optimization: Avoid scaling unneeded channels - if (!p->has_alpha || p->opts.alpha_mode != 1) - GLSL(color.a = 1.0;) + skip_unused(p, tex.components); } // Get the number of passes for prescaler, with given display size. -static int get_prescale_passes(struct gl_video *p) +static int get_prescale_passes(struct gl_video *p, struct img_tex tex[4]) { - if (!p->opts.prescale) + if (!p->opts.prescale_luma) return 0; + + // Return 0 if no luma planes exist + for (int n = 0; ; n++) { + if (n > 4) + return 0; + + if (tex[n].type == PLANE_LUMA) + break; + } + // The downscaling threshold check is turned off. if (p->opts.prescale_downscaling_threshold < 1.0f) return p->opts.prescale_passes; double scale_factors[2]; - get_scale_factors(p, scale_factors); + get_scale_factors(p, true, scale_factors); int passes = 0; for (; passes < p->opts.prescale_passes; passes ++) { @@ -1265,283 +1386,303 @@ static int get_prescale_passes(struct gl_video *p) return passes; } -// apply pre-scalers -static void pass_prescale(struct gl_video *p, int src_tex_num, int dst_tex_num, - int planes, int w, int h, int passes, - float tex_mul, struct gl_transform *offset) +// Upload the NNEDI3 UBO weights only if needed +static void upload_nnedi3_weights(struct gl_video *p) { - *offset = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}}; + GL *gl = p->gl; - int tex_num = src_tex_num; + if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO && + !p->nnedi3_weights_buffer) + { + gl->GenBuffers(1, &p->nnedi3_weights_buffer); + gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, p->nnedi3_weights_buffer); - // Happens to be the same for superxbr and nnedi3. - const int steps_per_pass = 2; + int size; + const float *weights = get_nnedi3_weights(p->opts.nnedi3_opts, &size); - for (int pass = 0; pass < passes; pass++) { - for (int step = 0; step < steps_per_pass; step++) { - struct gl_transform transform = {{{0}}}; + MP_VERBOSE(p, "Uploading NNEDI3 weights via UBO (size=%d)\n", size); - switch(p->opts.prescale) { - case 1: - pass_superxbr(p->sc, planes, tex_num, step, - tex_mul, p->opts.superxbr_opts, &transform); - break; - case 2: - pass_nnedi3(p->gl, p->sc, planes, tex_num, step, - tex_mul, p->opts.nnedi3_opts, &transform); - break; - default: - abort(); - } + // We don't know the endianness of GPU, just assume it's LE + gl->BufferData(GL_UNIFORM_BUFFER, size, weights, GL_STATIC_DRAW); + } +} - tex_mul = 1.0; +// Applies a single pass of the prescaler, and accumulates the offset in +// pass_transform. +static void pass_prescale_luma(struct gl_video *p, struct img_tex *tex, + struct gl_transform *pass_transform, + struct fbotex fbo[MAX_PRESCALE_STEPS]) +{ + // Happens to be the same for superxbr and nnedi3. + const int num_steps = 2; + + for (int step = 0; step < num_steps; step++) { + struct gl_transform step_transform = {{{0}}}; + int id = pass_bind(p, *tex); + int planes = tex->components; + + switch(p->opts.prescale_luma) { + case 1: + assert(planes == 1); + pass_superxbr(p->sc, id, step, tex->multiplier, + p->opts.superxbr_opts, &step_transform); + break; + case 2: + upload_nnedi3_weights(p); + pass_nnedi3(p->gl, p->sc, planes, id, step, tex->multiplier, + p->opts.nnedi3_opts, &step_transform, tex->gl_target); + break; + default: + abort(); + } - gl_transform_trans(transform, offset); + int new_w = tex->w * (int)step_transform.m[0][0], + new_h = tex->h * (int)step_transform.m[1][1]; - w *= (int)transform.m[0][0]; - h *= (int)transform.m[1][1]; + skip_unused(p, planes); + finish_pass_fbo(p, &fbo[step], new_w, new_h, 0); + *tex = img_tex_fbo(&fbo[step], identity_trans, tex->type, tex->components); - finish_pass_fbo(p, &p->prescale_fbo[pass][step], - w, h, dst_tex_num, 0); - tex_num = dst_tex_num; - } + // Accumulate the local transform + gl_transform_trans(step_transform, pass_transform); } } -// Prescale the planes from the main textures. -static bool pass_prescale_luma(struct gl_video *p, float tex_mul, - struct gl_transform *chromafix, - struct gl_transform *transform, - struct src_tex *prescaled_tex, - int *prescaled_planes) +// Copy a texture to the vec4 color, while increasing offset. Also applies +// the texture multiplier to the sampled color +static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img) { - if (p->opts.prescale == 2 && - p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) - { - // nnedi3 are configured to use uniform buffer objects. - if (!p->nnedi3_weights_buffer) { - p->gl->GenBuffers(1, &p->nnedi3_weights_buffer); - p->gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, - p->nnedi3_weights_buffer); - int weights_size; - const float *weights = - get_nnedi3_weights(p->opts.nnedi3_opts, &weights_size); - - MP_VERBOSE(p, "Uploading NNEDI3 weights via uniform buffer (size=%d)\n", - weights_size); - - // We don't know the endianness of GPU, just assume it's little - // endian. - p->gl->BufferData(GL_UNIFORM_BUFFER, weights_size, weights, - GL_STATIC_DRAW); - } + int count = img.components; + assert(*offset + count <= 4); + + int id = pass_bind(p, img); + char src[5] = {0}; + char dst[5] = {0}; + const char *tex_fmt = img.texture_la ? "ragg" : "rgba"; + const char *dst_fmt = "rgba"; + for (int i = 0; i < count; i++) { + src[i] = tex_fmt[i]; + dst[i] = dst_fmt[*offset + i]; } - // number of passes to apply prescaler, can be zero. - int prescale_passes = get_prescale_passes(p); - if (prescale_passes == 0) - return false; + if (img.use_integer) { + uint64_t tex_max = 1ull << p->image_desc.component_full_bits; + img.multiplier *= 1.0 / (tex_max - 1); + } - p->use_normalized_range = true; + GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n", + dst, img.multiplier, id, id, src); - // estimate a safe upperbound of planes being prescaled on texture0. - *prescaled_planes = p->is_yuv ? 1 : - (!p->color_swizzle[0] || p->color_swizzle[3] == 'a') ? 3 : 4; + *offset += count; +} - struct src_tex tex_backup[4]; - for (int i = 0; i < 4; i++) - tex_backup[i] = p->pass_tex[i]; +// sample from video textures, set "color" variable to yuv value +static void pass_read_video(struct gl_video *p) +{ + struct img_tex tex[4]; + pass_get_img_tex(p, &p->image, tex); - if (p->opts.deband) { - // apply debanding before upscaling. - pass_sample_deband(p->sc, p->opts.deband_opts, 0, p->pass_tex[0].gl_target, - tex_mul, &p->lfg); - finish_pass_fbo(p, &p->deband_fbo, p->texture_w, - p->texture_h, 0, 0); - tex_backup[0] = p->pass_tex[0]; + // Most of the steps here don't actually apply image transformations yet, + // save for the actual upscaling - so as a code convenience we store them + // separately + struct gl_transform transforms[4]; + struct gl_transform tex_trans = identity_trans; + for (int i = 0; i < 4; i++) { + transforms[i] = tex[i].transform; + tex[i].transform = identity_trans; } - // process texture0 and store the result in texture4. - pass_prescale(p, 0, 4, *prescaled_planes, p->texture_w, p->texture_h, - prescale_passes, p->opts.deband ? 1.0 : tex_mul, transform); - - // correct the chromafix under new transform. - chromafix->t[0] -= transform->t[0] / transform->m[0][0]; - chromafix->t[1] -= transform->t[1] / transform->m[1][1]; + int prescale_passes = get_prescale_passes(p, tex); - // restore the first four texture. - for (int i = 0; i < 4; i++) - p->pass_tex[i] = tex_backup[i]; + int dst_w = p->texture_w << prescale_passes, + dst_h = p->texture_h << prescale_passes; - // backup texture4 for later use. - *prescaled_tex = p->pass_tex[4]; + bool needs_deband[4]; + int scaler_id[4]; // ID if needed, -1 otherwise + int needs_prescale[4]; // number of prescaling passes left - return true; -} + // Determine what needs to be done for which plane + for (int i=0; i < 4; i++) { + enum plane_type type = tex[i].type; + if (type == PLANE_NONE) { + needs_deband[i] = false; + needs_prescale[i] = 0; + scaler_id[i] = -1; + continue; + } -// The input textures are in an integer format (non-fixed-point), like R16UI. -// Convert it to float in an extra pass. -static void pass_integer_conversion(struct gl_video *p, bool *chroma_merging) -{ - double tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace, - p->image_desc.component_bits, - p->image_desc.component_full_bits); - uint64_t tex_max = 1ull << p->image_desc.component_full_bits; - tex_mul *= 1.0 / (tex_max - 1); + needs_deband[i] = type != PLANE_ALPHA ? p->opts.deband : false; + needs_prescale[i] = type == PLANE_LUMA ? prescale_passes : 0; - struct src_tex pass_tex[TEXUNIT_VIDEO_NUM]; - assert(sizeof(pass_tex) == sizeof(p->pass_tex)); - memcpy(pass_tex, p->pass_tex, sizeof(pass_tex)); + scaler_id[i] = -1; + switch (type) { + case PLANE_RGB: + case PLANE_LUMA: + case PLANE_XYZ: + scaler_id[i] = SCALER_SCALE; + break; - *chroma_merging = p->plane_count == 3; + case PLANE_CHROMA: + scaler_id[i] = SCALER_CSCALE; + break; - for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) { - if (!p->pass_tex[n].gl_tex) - continue; - if (*chroma_merging && n == 2) + case PLANE_ALPHA: // always use bilinear for alpha + default: continue; - GLSLF("// integer conversion plane %d\n", n); - GLSLF("uvec4 icolor = texture(texture%d, texcoord%d);\n", n, n); - GLSLF("color = vec4(icolor) * tex_mul;\n"); - if (*chroma_merging && n == 1) { - GLSLF("uvec4 icolor2 = texture(texture2, texcoord2);\n"); - GLSLF("color.g = vec4(icolor2).r * tex_mul;\n"); } - gl_sc_uniform_f(p->sc, "tex_mul", tex_mul); - int c_w = p->pass_tex[n].src.x1 - p->pass_tex[n].src.x0; - int c_h = p->pass_tex[n].src.y1 - p->pass_tex[n].src.y0; - finish_pass_fbo(p, &p->integer_conv_fbo[n], c_w, c_h, n, 0); - pass_tex[n] = p->pass_tex[n]; - memcpy(p->pass_tex, pass_tex, sizeof(p->pass_tex)); + + // We can skip scaling if the texture is already at the required size + if (tex[i].w == dst_w && tex[i].h == dst_h) + scaler_id[i] = -1; } - p->use_normalized_range = true; -} + // Process all the planes that need some action perfo |