diff options
Diffstat (limited to 'video/out/gl_video.c')
-rw-r--r-- | video/out/gl_video.c | 864 |
1 files changed, 677 insertions, 187 deletions
diff --git a/video/out/gl_video.c b/video/out/gl_video.c index a52bd82020..5f64dcb1d6 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -44,7 +44,7 @@ // Pixel width of 1D lookup textures. #define LOOKUP_TEXTURE_SIZE 256 -// Texture units 0-3 are used by the video, with unit 0 for free use. +// Texture units 0-3 are used by the video, and for free use by the passes // Units 4-5 are used for scaler LUTs. #define TEXUNIT_SCALERS 4 #define TEXUNIT_3DLUT 6 @@ -123,16 +123,15 @@ struct scaler { struct fbosurface { struct fbotex fbotex; int64_t pts; - bool valid; }; -#define FBOSURFACES_MAX 2 +#define FBOSURFACES_MAX 4 struct src_tex { GLuint gl_tex; GLenum gl_target; int tex_w, tex_h; - struct mp_rect src; + struct mp_rect_f src; }; struct gl_video { @@ -171,10 +170,7 @@ struct gl_video { bool has_alpha; char color_swizzle[5]; - float input_gamma, conv_gamma; - float user_gamma; - bool user_gamma_enabled; // shader handles user_gamma - bool sigmoid_enabled; + bool user_gamma_enabled; struct video_image image; @@ -183,20 +179,14 @@ struct gl_video { struct fbosurface surfaces[FBOSURFACES_MAX]; size_t surface_idx; + size_t surface_now; + bool is_interpolated; // state for luma (0) and chroma (1) scalers struct scaler scalers[2]; - // true if scaler is currently upscaling - bool upscaling; - - bool is_interpolated; - struct mp_csp_equalizer video_eq; - // Source and destination color spaces for the CMS matrix - struct mp_csp_primaries csp_src, csp_dest; - struct mp_rect src_rect; // displayed part of the source video struct mp_rect dst_rect; // video rectangle on output window struct mp_osd_res osd_rect; // OSD size/margins @@ -366,7 +356,19 @@ const struct m_sub_options gl_video_conf = { .opts = (const m_option_t[]) { OPT_FLOATRANGE("gamma", gamma, 0, 0.1, 2.0), OPT_FLAG("gamma-auto", gamma_auto, 0), - OPT_FLAG("srgb", srgb, 0), + OPT_CHOICE("target-prim", target_prim, 0, + ({"auto", MP_CSP_PRIM_AUTO}, + {"bt601-525", MP_CSP_PRIM_BT_601_525}, + {"bt601-625", MP_CSP_PRIM_BT_601_625}, + {"bt709", MP_CSP_PRIM_BT_709}, + {"bt2020", MP_CSP_PRIM_BT_2020}, + {"bt470m", MP_CSP_PRIM_BT_470M})), + OPT_CHOICE("target-trc", target_trc, 0, + ({"auto", MP_CSP_TRC_AUTO}, + {"bt1886", MP_CSP_TRC_BT_1886}, + {"srgb", MP_CSP_TRC_SRGB}, + {"linear", MP_CSP_TRC_LINEAR}, + {"gamma22", MP_CSP_TRC_GAMMA22})), OPT_FLAG("npot", npot, 0), OPT_FLAG("pbo", pbo, 0), OPT_STRING_VALIDATE("scale", scalers[0], 0, validate_scaler_opt), @@ -433,6 +435,7 @@ const struct m_sub_options gl_video_conf = { OPT_REPLACED("cparam2", "cscale-param2"), OPT_REPLACED("cradius", "cscale-radius"), OPT_REPLACED("cantiring", "cscale-antiring"), + OPT_REPLACED("srgb", "target-prim=srgb:target-trc=srgb"), {0} }, @@ -479,6 +482,19 @@ void gl_video_set_debug(struct gl_video *p, bool enable) gl_set_debug_logger(gl, enable ? p->log : NULL); } +static void gl_video_reset_surfaces(struct gl_video *p) +{ + for (int i = 0; i < FBOSURFACES_MAX; i++) + p->surfaces[i].pts = 0; + p->surface_idx = 0; + p->surface_now = 0; +} + +static size_t fbosurface_next(size_t id) +{ + return (id+1) % FBOSURFACES_MAX; +} + static void recreate_osd(struct gl_video *p) { if (p->osd) @@ -507,6 +523,8 @@ static void uninit_rendering(struct gl_video *p) gl->DeleteTextures(1, &p->dither_texture); p->dither_texture = 0; + + gl_video_reset_surfaces(p); } void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d) @@ -546,13 +564,28 @@ void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d) reinit_rendering(p); } -static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg) +static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo, int id, + int w, int h) +{ + p->pass_tex[id] = (struct src_tex){ + .gl_tex = src_fbo->texture, + .gl_target = GL_TEXTURE_2D, + .tex_w = src_fbo->tex_w, + .tex_h = src_fbo->tex_h, + .src = {0, 0, w, h}, + }; +} + +static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg, + float chroma[3][2]) { GLuint imgtex[4] = {0}; assert(vimg->mpi); - float offset[2] = {0}; + float ls_w = 1.0 / (1 << p->image_desc.chroma_xs); + float ls_h = 1.0 / (1 << p->image_desc.chroma_ys); + int chroma_loc = p->opts.chroma_location; if (!chroma_loc) chroma_loc = p->image_params.chroma_location; @@ -564,13 +597,21 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg // so that the luma and chroma sample line up exactly. // For 4:4:4, setting chroma location should have no effect at all. // luma sample size (in chroma coord. space) - float ls_w = 1.0 / (1 << p->image_desc.chroma_xs); - float ls_h = 1.0 / (1 << p->image_desc.chroma_ys); - // move chroma center to luma center (in chroma coord. space) - offset[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; - offset[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + chroma[2][0] = ls_w < 1 ? ls_w * -cx / 2 : 0; + chroma[2][1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + } else { + chroma[2][0] = chroma[2][1] = 0.0; } + // Make sure luma/chroma sizes are aligned. + // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2 + // so luma (3,3) has to align with chroma (2,2). + chroma[0][0] = ls_w * (float)vimg->planes[0].tex_w + / vimg->planes[1].tex_w; + chroma[1][1] = ls_h * (float)vimg->planes[0].tex_h + / vimg->planes[1].tex_h; + chroma[0][1] = chroma[1][0] = 0.0; // No rotation etc. + if (p->hwdec_active) { p->hwdec->driver->map_image(p->hwdec, vimg->mpi, imgtex); } else { @@ -585,17 +626,7 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg .gl_target = t->gl_target, .tex_w = t->tex_w, .tex_h = t->tex_h, - //.src = {0, 0, t->w, t->h}, - .src = { - // xxx this is wrong; we want to crop the source when sampling - // from indirect_fbo, but not when rendering to indirect_fbo - // also, this should apply offset, and take care of odd video - // dimensions properly; and it should use floats instead - .x0 = p->src_rect.x0 >> p->image_desc.xs[n], - .y0 = p->src_rect.y0 >> p->image_desc.ys[n], - .x1 = p->src_rect.x1 >> p->image_desc.xs[n], - .y1 = p->src_rect.y1 >> p->image_desc.ys[n], - }, + .src = {0, 0, t->w, t->h}, }; } } @@ -712,7 +743,7 @@ static void pass_prepare_src_tex(struct gl_video *p) GL *gl = p->gl; struct gl_shader_cache *sc = p->sc; - for (int n = 0; n < p->plane_count; n++) { + for (int n = 0; n < 4; n++) { struct src_tex *s = &p->pass_tex[n]; if (!s->gl_tex) continue; @@ -722,9 +753,9 @@ static void pass_prepare_src_tex(struct gl_video *p) snprintf(texture_name, sizeof(texture_name), "texture%d", n); snprintf(texture_size, sizeof(texture_size), "texture_size%d", n); - gl_sc_uniform_sampler(sc, texture_name, p->gl_target, n); + gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n); float f[2] = {1, 1}; - if (p->gl_target != GL_TEXTURE_RECTANGLE) { + if (s->gl_target != GL_TEXTURE_RECTANGLE) { f[0] = s->tex_w; f[1] = s->tex_h; } @@ -736,12 +767,13 @@ static void pass_prepare_src_tex(struct gl_video *p) gl->ActiveTexture(GL_TEXTURE0); } +// flags = bits 0-1: rotate, bit 2: flip vertically static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, - const struct mp_rect *dst) + const struct mp_rect *dst, int flags) { struct vertex va[4]; - float matrix[3][3]; + float matrix[3][2]; gl_matrix_ortho2d(matrix, 0, vp_w, 0, vp_h); float x[2] = {dst->x0, dst->x1}; @@ -758,6 +790,8 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, if (s->gl_tex) { float tx[2] = {s->src.x0, s->src.x1}; float ty[2] = {s->src.y0, s->src.y1}; + if (flags & 4) + MPSWAP(float, ty[0], ty[1]); bool rect = s->gl_target == GL_TEXTURE_RECTANGLE; v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->tex_w); v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->tex_h); @@ -765,20 +799,31 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, } } + int rot = flags & 3; + while (rot--) { + static const int perm[4] = {1, 3, 0, 2}; + struct vertex vb[4]; + memcpy(vb, va, sizeof(vb)); + for (int n = 0; n < 4; n++) + memcpy(va[n].texcoord, vb[perm[n]].texcoord, + sizeof(struct vertex_pt[4])); + } + gl_vao_draw_data(&p->vao, GL_TRIANGLE_STRIP, va, 4); debug_check_gl(p, "after rendering"); } +// flags: see render_pass_quad static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h, - const struct mp_rect *dst) + const struct mp_rect *dst, int flags) { GL *gl = p->gl; pass_prepare_src_tex(p); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); gl->Viewport(0, 0, vp_w, vp_h < 0 ? -vp_h : vp_h); gl_sc_gen_shader_and_reset(p->sc); - render_pass_quad(p, vp_w, vp_h, dst); + render_pass_quad(p, vp_w, vp_h, dst, flags); gl->BindFramebuffer(GL_FRAMEBUFFER, 0); memset(&p->pass_tex, 0, sizeof(p->pass_tex)); } @@ -787,22 +832,17 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h // FBO, if the required parameters have changed // w, h: required FBO target dimension, and also defines the target rectangle // used for rasterization +// tex: the texture ID to load the result back into // flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy // flags allows the FBO to be larger than the target) static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, - int w, int h, int flags) + int w, int h, int tex, int flags) { fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags); finish_pass_direct(p, dst_fbo->fbo, dst_fbo->tex_w, dst_fbo->tex_h, - &(struct mp_rect){0, 0, w, h}); - p->pass_tex[0] = (struct src_tex){ - .gl_tex = dst_fbo->texture, - .gl_target = GL_TEXTURE_2D, - .tex_w = dst_fbo->tex_w, - .tex_h = dst_fbo->tex_h, - .src = {0, 0, w, h}, - }; + &(struct mp_rect){0, 0, w, h}, 0); + pass_load_fbotex(p, dst_fbo, tex, w, h); } static void uninit_scaler(struct gl_video *p, int scaler_unit) @@ -834,6 +874,9 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name, scaler->insufficient = false; scaler->initialized = true; + for (int n = 0; n < 2; n++) + scaler->params[n] = p->opts.scaler_params[scaler->index][n]; + const struct filter_kernel *t_kernel = mp_find_filter_kernel(scaler->name); if (!t_kernel) return; @@ -842,8 +885,8 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name, scaler->kernel = &scaler->kernel_storage; for (int n = 0; n < 2; n++) { - if (!isnan(p->opts.scaler_params[scaler->index][n])) - scaler->kernel->params[n] = p->opts.scaler_params[scaler->index][n]; + if (!isnan(scaler->params[n])) + scaler->kernel->params[n] = scaler->params[n]; } scaler->antiring = p->opts.scaler_antiring[scaler->index]; @@ -920,14 +963,15 @@ static void pass_sample_separated_get_weights(struct gl_video *p, GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));) GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);) } else { - GLSL(float weights[N];) - GLSL(for (int n = 0; n < N / 4; n++) {) - GLSL( vec4 c = texture(lut, vec2(1.0 / (N / 2) + n / float(N / 4), fcoord));) - GLSL( weights[n * 4 + 0] = c.r;) - GLSL( weights[n * 4 + 1] = c.g;) - GLSL( weights[n * 4 + 2] = c.b;) - GLSL( weights[n * 4 + 3] = c.a;) - GLSL(}) + GLSLF("float weights[%d];\n", N); + for (int n = 0; n < N / 4; n++) { + GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n", + N / 2, n, N / 4); + GLSLF("weights[%d] = c.r;\n", n * 4 + 0); + GLSLF("weights[%d] = c.g;\n", n * 4 + 1); + GLSLF("weights[%d] = c.b;\n", n * 4 + 2); + GLSLF("weights[%d] = c.a;\n", n * 4 + 3); + } } } @@ -937,117 +981,294 @@ static void pass_sample_separated_gen(struct gl_video *p, struct scaler *scaler, int d_x, int d_y) { int N = scaler->kernel->size; + bool use_ar = scaler->antiring > 0; + GLSL(vec4 color = vec4(0.0);) + GLSLF("{\n"); GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y); - GLSLF("#define N %d\n", N); - GLSLF("#define ANTIRING %f\n", scaler->antiring); - GLSL(vec2 pt = (vec2(1.0) / texture_size0) * dir;) - GLSL(float fcoord = dot(fract(texcoord0 * texture_size0 - vec2(0.5)), dir);) - GLSL(vec2 base = texcoord0 - fcoord * pt - pt * vec2(N / 2 - 1);) + GLSL(vec2 pt = (vec2(1.0) / sample_size) * dir;) + GLSL(float fcoord = dot(fract(sample_pos * sample_size - vec2(0.5)), dir);) + GLSLF("vec2 base = sample_pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1); + GLSL(vec4 c;) + if (use_ar) { + GLSL(vec4 hi = vec4(0.0);) + GLSL(vec4 lo = vec4(1.0);) + } pass_sample_separated_get_weights(p, scaler); - GLSL(vec4 color = vec4(0);) - GLSL(vec4 hi = vec4(0);) - GLSL(vec4 lo = vec4(1);) - GLSL(for (int n = 0; n < N; n++) {) - GLSL( vec4 c = texture(texture0, base + pt * vec2(n));) - GLSL( color += vec4(weights[n]) * c;) - GLSL( if (n == N/2-1 || n == N/2) {) - GLSL( lo = min(lo, c);) - GLSL( hi = max(hi, c);) - GLSL( }) - GLSL(}) - GLSL(color = mix(color, clamp(color, lo, hi), ANTIRING);) -} - -static void pass_sample_separated(struct gl_video *p, struct scaler *scaler, - int w, int h) + GLSLF("// scaler samples\n"); + for (int n = 0; n < N; n++) { + GLSLF("c = texture(texture0, base + pt * vec2(%d));\n", n); + GLSLF("color += vec4(weights[%d]) * c;\n", n); + if (use_ar && (n == N/2-1 || n == N/2)) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + } + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring); + GLSLF("}\n"); +} + +static void pass_sample_separated(struct gl_video *p, int src_tex, + struct scaler *scaler, int w, int h, + float transform[3][2]) { + // Keep the x components untouched for the first pass + struct mp_rect_f src_new = p->pass_tex[0].src; + gl_matrix_mul_rect(transform, &src_new); GLSLF("// pass 1\n"); + p->pass_tex[0].src.y0 = src_new.y0; + p->pass_tex[0].src.y1 = src_new.y1; pass_sample_separated_gen(p, scaler, 0, 1); int src_w = p->pass_tex[0].src.x1 - p->pass_tex[0].src.x0; - finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, 0); + finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H); + // Restore the sample source for the second pass + GLSLF("#define sample_tex texture%d\n", src_tex); + GLSLF("#define sample_pos texcoord%d\n", src_tex); + GLSLF("#define sample_size texture_size%d\n", src_tex); GLSLF("// pass 2\n"); + p->pass_tex[0].src.x0 = src_new.x0; + p->pass_tex[0].src.x1 = src_new.x1; pass_sample_separated_gen(p, scaler, 1, 0); } -// Scale. This uses the p->pass_tex[0] texture as source. It's hardcoded to -// use all variables and values associated with p->pass_tex[0] (which includes -// texture0/texcoord0/texture_size0). -// The src rectangle is implicit in p->pass_tex. +static void pass_sample_polar(struct gl_video *p, struct scaler *scaler) +{ + double radius = scaler->kernel->radius; + int bound = (int)ceil(radius); + bool use_ar = scaler->antiring > 0; + GLSL(vec4 color = vec4(0.0);) + GLSLF("{\n"); + GLSL(vec2 pt = vec2(1.0) / sample_size;) + GLSL(vec2 fcoord = fract(sample_pos * sample_size - vec2(0.5));) + GLSL(vec2 base = sample_pos - fcoord * pt;) + GLSL(vec4 c;) + GLSLF("float w, d, wsum = 0.0;\n"); + if (use_ar) { + GLSL(vec4 lo = vec4(1.0);) + GLSL(vec4 hi = vec4(0.0);) + } + gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target, + TEXUNIT_SCALERS + scaler->index); + GLSLF("// scaler samples\n"); + for (int y = 1-bound; y <= bound; y++) { + for (int x = 1-bound; x <= bound; x++) { + // Since we can't know the subpixel position in advance, assume a + // worst case scenario + int yy = y > 0 ? y-1 : y; + int xx = x > 0 ? x-1 : x; + double dmax = sqrt(xx*xx + yy*yy); + // Skip samples definitely outside the radius + if (dmax >= radius) + continue; + GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius); + // Check for samples that might be skippable + if (dmax >= radius - 1) + GLSLF("if (d < 1.0) {\n"); + GLSL(w = texture1D(lut, d).r;) + GLSL(wsum += w;) + GLSLF("c = texture(sample_tex, base + pt * vec2(%d, %d));\n", x, y); + GLSL(color += vec4(w) * c;) + if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + if (dmax >= radius -1) + GLSLF("}\n"); + } + } + GLSL(color = color / vec4(wsum);) + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring); + GLSLF("}\n"); +} + +static void bicubic_calcweights(struct gl_video *p, const char *t, const char *s) +{ + // Explanation of how bicubic scaling with only 4 texel fetches is done: + // http://www.mate.tue.nl/mate/pdfs/10318.pdf + // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' + // Explanation why this algorithm normally always blurs, even with unit + // scaling: + // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf + // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' + GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" + " + vec4(1, 0, -0.5, 0.5);\n", t, s); + GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); + GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); + GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); + GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s); +} + +static void pass_sample_bicubic_fast(struct gl_video *p) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 pt = 1.0 / sample_size;) + GLSL(vec2 fcoord = fract(sample_tex * sample_size + vec2(0.5, 0.5));) + bicubic_calcweights(p, "parmx", "fcoord.x"); + bicubic_calcweights(p, "parmy", "fcoord.y"); + GLSL(vec4 cdelta;) + GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);) + GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);) + // first y-interpolation + GLSL(vec4 ar = texture(sample_tex, sample_pos + cdelta.xy);) + GLSL(vec4 ag = texture(sample_tex, sample_pos + cdelta.xw);) + GLSL(vec4 ab = mix(ag, ar, parmy.b);) + // second y-interpolation + GLSL(vec4 br = texture(sample_tex, sample_pos + cdelta.zy);) + GLSL(vec4 bg = texture(sample_tex, sample_pos + cdelta.zw);) + GLSL(vec4 aa = mix(bg, br, parmy.b);) + // x-interpolation + GLSL(color = mix(aa, ab, parmx.b);) + GLSLF("}\n"); +} + +static void pass_sample_sharpen3(struct gl_video *p, struct scaler *scaler) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 pt = 1.0 / sample_size;) + GLSL(vec2 st = pt * 0.5;) + GLSL(vec4 p = texture(sample_tex, sample_pos);) + GLSL(vec4 sum = texture(sample_tex, sample_pos + st * vec2(+1, +1)) + + texture(sample_tex, sample_pos + st * vec2(+1, -1)) + + texture(sample_tex, sample_pos + st * vec2(-1, +1)) + + texture(sample_tex, sample_pos + st * vec2(-1, -1));) + double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0]; + GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param); + GLSLF("}\n"); +} + +static void pass_sample_sharpen5(struct gl_video *p, struct scaler *scaler) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 pt = 1.0 / sample_size;) + GLSL(vec2 st1 = pt * 1.2;) + GLSL(vec4 p = texture(sample_tex, sample_pos);) + GLSL(vec4 sum1 = texture(sample_tex, sample_pos + st1 * vec2(+1, +1)) + + texture(sample_tex, sample_pos + st1 * vec2(+1, -1)) + + texture(sample_tex, sample_pos + st1 * vec2(-1, +1)) + + texture(sample_tex, sample_pos + st1 * vec2(-1, -1));) + GLSL(vec2 st2 = pt * 1.5;) + GLSL(vec4 sum2 = texture(sample_tex, sample_pos + st2 * vec2(+1, 0)) + + texture(sample_tex, sample_pos + st2 * vec2( 0, +1)) + + texture(sample_tex, sample_pos + st2 * vec2(-1, 0)) + + texture(sample_tex, sample_pos + st2 * vec2( 0, -1));) + GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) + double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0]; + GLSLF("color = p + t * %f;\n", param); + GLSLF("}\n"); + +} + +// Sample. This samples from the texture ID given by src_tex. It's hardcoded to +// use all variables and values associated with it (which includes textureN, +// texcoordN and texture_sizeN). +// The src rectangle is implicit in p->pass_tex + transform. // The dst rectangle is implicit by what the caller will do next, but w and h // must still be what is going to be used (to dimension FBOs correctly). // This will declare "vec4 color;", which contains the scaled contents. // The scaler unit is initialized by this function; in order to avoid cache // thrashing, the scaler unit should usually use the same parameters. -static void pass_scale(struct gl_video *p, int scaler_unit, const char *name, - double scale_factor, int w, int h) +static void pass_sample(struct gl_video *p, int src_tex, + int scaler_unit, const char *name, double scale_factor, + int w, int h, float transform[3][2]) { struct scaler *scaler = &p->scalers[scaler_unit]; reinit_scaler(p, scaler_unit, name, scale_factor); + // Set up the sample parameters appropriately + GLSLF("#define sample_tex texture%d\n", src_tex); + GLSLF("#define sample_pos texcoord%d\n", src_tex); + GLSLF("#define sample_size texture_size%d\n", src_tex); + + // Set up the transformation for everything other than separated scaling + if (!scaler->kernel || scaler->kernel->polar) + gl_matrix_mul_rect(transform, &p->pass_tex[src_tex].src); + // Dispatch the scaler. They're all wildly different. if (strcmp(scaler->name, "bilinear") == 0) { - GLSL(vec4 color = texture(texture0, texcoord0);) - } else if (scaler->kernel && !scaler->kernel->polar) { - pass_sample_separated(p, scaler, w, h); + GLSL(vec4 color = texture(sample_tex, sample_pos);) + } else if (strcmp(scaler->name, "bicubic_fast") == 0) { + pass_sample_bicubic_fast(p); + } else if (strcmp(scaler->name, "sharpen3") == 0) { + pass_sample_sharpen3(p, scaler); + } else if (strcmp(scaler->name, "sharpen5") == 0) { + pass_sample_sharpen5(p, scaler); + } else if (scaler->kernel && scaler->kernel->polar) { + pass_sample_polar(p, scaler); + } else if (scaler->kernel) { + pass_sample_separated(p, src_tex, scaler, w, h, transform); } else { - abort(); //not implemented yet + // Should never happen + abort(); } + + // Micro-optimization: Avoid scaling unneeded channels + if (!p->has_alpha || p->opts.alpha_mode != 1) + GLSL(color.a = 1.0;) } // sample from video textures, set "color" variable to yuv value -// (not sure how exactly this should involve the resamplers) -static void pass_read_video(struct gl_video *p, bool *use_indirect) +static void pass_read_video(struct gl_video *p) { - pass_set_image_textures(p, &p->image); + float chromafix[3][2]; + pass_set_image_textures(p, &p->image, chromafix); + + if (p->plane_count == 1) { + GLSL(vec4 color = texture(texture0, texcoord0);) + goto fixalpha; + } - if (p->plane_count > 1) { + const char *cscale = p->opts.scalers[1]; + if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED && + strcmp(cscale, "bilinear") != 0) { + struct src_tex luma = p->pass_tex[0]; + if (p->plane_count > 2) { + // For simplicity and performance, we merge the chroma planes + // into a single texture before scaling, so the scaler doesn't + // need to run multiple times. + GLSLF("// chroma merging\n"); + GLSL(vec4 color = vec4(texture(texture1, texcoord0).r, + texture(texture2, texcoord2).r, + 0.0, 1.0);) + int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0; + int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0; + assert(c_w == p->pass_tex[2].src.x1 - p->pass_tex[2].src.x0); + assert(c_h == p->pass_tex[2].src.y1 - p->pass_tex[2].src.y0); + finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 1, 0); + } + GLSLF("// chroma scaling\n"); + pass_sample(p, 1, 1, cscale, 1.0, p->image_w, p->image_h, chromafix); + GLSL(vec2 chroma = color.rg;) + // Always force rendering to a FBO before main scaling, or we would + // scale chroma incorrectly. + p->use_indirect = true; + p->pass_tex[0] = luma; // Restore luma after scaling + } else { + GLSL(vec4 color;) if (p->plane_count == 2) { - GLSL(vec2 chroma = texture(texture1, texcoord1).RG;) // NV formats + gl_matrix_mul_rect(chromafix, &p->pass_tex[1].src); + GLSL(vec2 chroma = texture(texture1, texcoord0).rg;) // NV formats } else { + gl_matrix_mul_rect(chromafix, &p->pass_tex[1].src); + gl_matrix_mul_rect(chromafix, &p->pass_tex[2].src); GLSL(vec2 chroma = vec2(texture(texture1, texcoord1).r, texture(texture2, texcoord2).r);) } + } - const char *cscale = p->opts.scalers[1]; - if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED && - strcmp(cscale, "bilinear") != 0) { - GLSLF("// chroma merging\n"); - GLSL(vec4 color = vec4(chroma.r, chroma.g, 0.0, 0.0);) - if (1) { //p->plane_count > 2) { - // For simplicity - and maybe also for performance - we merge - // the chroma planes into one texture before scaling. So the - // scaler doesn't need to deal with more than 1 source texture. - int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0; - int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0; - finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 0); - } - GLSLF("// chroma scaling\n"); - pass_scale(p, 1, cscale, 1.0, p->image_w, p->image_h); - GLSL(vec2 chroma = color.rg;) - // Always force rendering to a FBO before main scaling, or we would - // scale chroma incorrectly. - *use_indirect = true; - - // What we'd really like to do is putting the output of the chroma - // scaler on texture unit 1, and leave luma on unit 0 (alpha on 3). - // But this obviously doesn't work, so here's an extremely shitty - // hack. Keep in mind that the shader already uses tex unit 0, so - // it can't be changed. alpha is missing too. - struct src_tex prev = p->pass_tex[0]; - pass_set_image_textures(p, &p->image); - p->pass_tex[1] = p->pass_tex[0]; - p->pass_tex[0] = prev; - GLSL(color = vec4(texture(texture1, texcoord1).r, chroma, 0);) - } else { - GLSL(vec4 color = vec4(0.0, chroma, 0.0);) - // These always use bilinear; either because the scaler is bilinear, - // or because we use an indirect pass. - GLSL(color.r = texture(texture0, texcoord0).r;) - if (p->has_alpha && p->plane_count >= 4) - GLSL(color.a = texture(texture3, texcoord3).r;) - } - } else { - GLSL(vec4 color = texture(texture0, texcoord0);) + GLSL(color = vec4(texture(texture0, texcoord0).r, chroma, 1.0);) + +fixalpha: + if (p->has_alpha) { + if (p->plane_count >= 4) + GLSL(color.a = texture(texture3, texcoord3).r;) + if (p->opts.alpha_mode == 0) // none + GLSL(color.a = 1.0;) + if (p->opts.alpha_mode == 2) // blend + GLSL(color = vec4(color.rgb * color.a, 1.0);) } } @@ -1056,33 +1277,38 @@ static void pass_convert_yuv(struct gl_video *p) { struct gl_shader_cache *sc = p->sc; + struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; + cparams.gray = p->is_yuv && !p->is_packed_yuv && p->plane_count == 1; + cparams.input_bits = p->image_desc.component_bits; + cparams.texture_bits = (cparams.input_bits + 7) & ~7; + mp_csp_set_image_params(&cparams, &p->image_params); + mp_csp_copy_equalizer_values(&cparams, &p->video_eq); + + float user_gamma = cparams.gamma * p->opts.gamma; + p->user_gamma_enabled |= user_gamma != 1.0; + GLSLF("// color conversion\n"); if (p->color_swizzle[0]) GLSLF("color = color.%s;\n", p->color_swizzle); - // Conversion from Y'CbCr or other spaces to RGB - if (!p->is_rgb) { - struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; - cparams.gray = p->is_yuv && !p->is_packed_yuv && p->plane_count == 1; - cparams.input_bits = p->image_desc.component_bits; - cparams.texture_bits = (cparams.input_bits + 7) & ~7; - mp_csp_set_image_params(&cparams, &p->image_params); - mp_csp_copy_equalizer_values(&cparams, &p->video_eq); - if (p->image_desc.flags & MP_IMGFLAG_XYZ) { - cparams.colorspace = MP_CSP_XYZ; - cparams.input_bits = 8; - cparams.texture_bits = 8; - } + // Pre-colormatrix input gamma correction + if (p->image_desc.flags & MP_IMGFLAG_XYZ) { + cparams.colorspace = MP_CSP_XYZ; + cparams.input_bits = 8; + cparams.texture_bits = 8; + // Pre-colormatrix input gamma correction. Note that this results in + // linear light + GLSL(color.rgb *= vec3(2.6);) + } + + // Conversion from Y'CbCr or other linear spaces to RGB + if (!p->is_rgb) { struct mp_cmat m = {{{0}}}; if (p->image_desc.flags & MP_IMGFLAG_XYZ) { - // Hard-coded as relative colorimetric for now, since this transforms - // from the source file's D55 material to whatever color space our - // projector/display lives in, which should be D55 for a proper - // home cinema setup either way. - mp_get_xyz2rgb_coeffs(&cparams, p->csp_src, - MP_INTENT_RELATIVE_COLORIMETRIC, &m); + struct mp_csp_primaries csp = mp_get_csp_primaries(p->image_params.primaries); + mp_get_xyz2rgb_coeffs(&cparams, csp, MP_INTENT_RELATIVE_COLORIMETRIC, &m); } else { mp_get_yuv2rgb_coeffs(&cparams, &m); } @@ -1091,6 +1317,50 @@ static void pass_convert_yuv(struct gl_video *p) GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) } + + if (p->image_params.colorspace == MP_CSP_BT_2020_C) { + p->use_indirect = true; + // Conversion for C'rcY'cC'bc via the BT.2020 CL system: + // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 + // = (B'-Y'c) / 1.5816 | C'bc > 0 + // + // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 + // = (R'-Y'c) / 0.9936 | C'rc > 0 + // + // as per the BT.2020 specification, table 4. This is a non-linear + // transformation because (constant) luminance receives non-equal + // contributions from the three different channels. + GLSLF("// constant luminance conversion\n"); + GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936), + vec2(1.9404, 1.7184), + lessThanEqual(color.br, vec2(0))) + + color.gg;) + // Expand channels to camera-linear light. This shader currently just + // assumes everything uses the BT.2020 12-bit gamma function, since the + // difference between 10 and 12-bit is negligible for anything other + // than 12-bit content. + GLSL(color.rgb = mix(color.rgb / vec3(4.5), + pow((color.rgb + vec3(0.0993))/vec3(1.0993), vec3(1.0/0.45)), + lessThanEqual(vec3(0.08145), color.rgb));) + // Calculate the green channel from the expanded RYcB + // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B + GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)/0.6780;) + // Re-compand to receive the R'G'B' result, same as other systems + GLSL(color.rgb = mix(color.rgb * vec3(4.5), + vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), + lessThanEqual(vec3(0.0181), color.rgb));) + } + + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + + if (p->user_gamma_enabled) { + p->use_indirect = true; + gl_sc_uniform_f(sc, "user_gamma", user_gamma); + GLSL(color.rgb = pow(color.rgb, vec3(1.0 / user_gamma));) + } + + if (!p->has_alpha) + GLSL(color.a = 1.0;) } static void get_scale_factors(struct gl_video *p, double xy[2]) @@ -1101,7 +1371,9 @@ static void get_scale_factors(struct gl_video *p, double xy[2]) (double)(p->src_rect.y1 - p->src_rect.y0); } -static void pass_scale_main(struct gl_video *p, bool use_indirect) +// Takes care of the main scaling and post-conversions such as gamut/gamma +// mapping or color management. +static void pass_render_main(struct gl_video *p) { // Figure out the main scaler. double xy[2]; @@ -1123,15 +1395,150 @@ static void pass_scale_main(struct gl_video *p, bool use_indirect) scale_factor = FFMAX(1.0, 1.0 / f); } + bool use_cms = p->use_lut_3d || p->opts.target_prim != MP_CSP_PRIM_AUTO + || p->opts.target_trc != MP_CSP_TRC_AUTO; + + // Pre-conversion, like linear light/sigmoidization + GLSLF("// scaler pre-conversion\n"); + bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling + || use_cms || p->image_params.gamma == MP_CSP_TRC_LINEAR; + if (use_linear) { + p->use_indirect = true; + switch (p->image_params.gamma) { + case MP_CSP_TRC_SRGB: + GLSL(color.rgb = mix(color.rgb / vec3(12.92), + pow((color.rgb + vec3(0.055))/vec3(1.055), + vec3(2.4)), + lessThanEqual(vec3(0.04045), color.rgb));) + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.961));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(2.2));) + break; + } + } + + bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling; + float sig_center, sig_slope, sig_offset, sig_scale; + if (use_sigmoid) { + p->use_indirect = true; + // Coefficients for the sigmoidal transform are taken from the + // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal + sig_center = p->opts.sigmoid_center; + sig_slope = p->opts.sigmoid_slope; + // This functio |