diff options
Diffstat (limited to 'video/out/gpu/video.c')
-rw-r--r-- | video/out/gpu/video.c | 831 |
1 files changed, 472 insertions, 359 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 851289e281..1478ec4687 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -183,6 +183,7 @@ struct gl_video { struct mp_image_params real_image_params; // configured format struct mp_image_params image_params; // texture format (mind hwdec case) + struct mp_image_params target_params; // target format struct ra_imgfmt_desc ra_format; // texture format int plane_count; @@ -212,6 +213,7 @@ struct gl_video { struct ra_tex *merge_tex[4]; struct ra_tex *scale_tex[4]; struct ra_tex *integer_tex[4]; + struct ra_tex *chroma_tex[4]; struct ra_tex *indirect_tex; struct ra_tex *blend_subs_tex; struct ra_tex *error_diffusion_tex[2]; @@ -281,10 +283,7 @@ struct gl_video { struct cached_file *files; int num_files; - bool hwdec_interop_loading_done; - struct ra_hwdec **hwdecs; - int num_hwdecs; - + struct ra_hwdec_ctx hwdec_ctx; struct ra_hwdec_mapper *hwdec_mapper; struct ra_hwdec *hwdec_overlay; bool hwdec_active; @@ -298,7 +297,6 @@ struct gl_video { static const struct gl_video_opts gl_video_opts_def = { .dither_algo = DITHER_FRUIT, - .dither_depth = -1, .dither_size = 6, .temporal_dither_period = 1, .error_diffusion = "sierra-lite", @@ -306,44 +304,36 @@ static const struct gl_video_opts gl_video_opts_def = { .sigmoid_center = 0.75, .sigmoid_slope = 6.5, .scaler = { - {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // scale - {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // dscale - {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // cscale - {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .clamp = 1, }, // tscale + {{"lanczos", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // scale + {{"hermite", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // dscale + {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // cscale + {{"oversample", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // tscale }, - .scaler_resizes_only = 1, - .scaler_lut_size = 6, - .interpolation_threshold = 0.0001, - .alpha_mode = ALPHA_BLEND_TILES, - .background = {0, 0, 0, 255}, + .scaler_resizes_only = true, + .correct_downscaling = true, + .linear_downscaling = true, + .sigmoid_upscaling = true, + .interpolation_threshold = 0.01, + .background = BACKGROUND_TILES, + .background_color = {0, 0, 0, 255}, .gamma = 1.0f, .tone_map = { - .curve = TONE_MAPPING_BT_2390, + .curve = TONE_MAPPING_AUTO, .curve_param = NAN, .max_boost = 1.0, - .decay_rate = 100.0, - .scene_threshold_low = 5.5, - .scene_threshold_high = 10.0, - .desat = 0.75, - .desat_exp = 1.5, - .gamut_clipping = 1, + .decay_rate = 20.0, + .scene_threshold_low = 1.0, + .scene_threshold_high = 3.0, + .contrast_smoothness = 3.5, }, .early_flush = -1, + .shader_cache = true, .hwdec_interop = "auto", }; -static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); - -static int validate_window_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); - -static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); +static OPT_STRING_VALIDATE_FUNC(validate_scaler_opt); +static OPT_STRING_VALIDATE_FUNC(validate_window_opt); +static OPT_STRING_VALIDATE_FUNC(validate_error_diffusion_opt); #define OPT_BASE_STRUCT struct gl_video_opts @@ -357,10 +347,10 @@ static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *op {n"-param1", OPT_FLOATDEF(scaler[i].kernel.params[0])}, \ {n"-param2", OPT_FLOATDEF(scaler[i].kernel.params[1])}, \ {n"-blur", OPT_FLOAT(scaler[i].kernel.blur)}, \ - {n"-cutoff", OPT_FLOAT(scaler[i].cutoff), M_RANGE(0.0, 1.0)}, \ + {n"-cutoff", OPT_REMOVED("Hard-coded as 0.001")}, \ {n"-taper", OPT_FLOAT(scaler[i].kernel.taper), M_RANGE(0.0, 1.0)}, \ {n"-wparam", OPT_FLOATDEF(scaler[i].window.params[0])}, \ - {n"-wblur", OPT_FLOAT(scaler[i].window.blur)}, \ + {n"-wblur", OPT_REMOVED("Just adjust filter radius directly")}, \ {n"-wtaper", OPT_FLOAT(scaler[i].window.taper), M_RANGE(0.0, 1.0)}, \ {n"-clamp", OPT_FLOAT(scaler[i].clamp), M_RANGE(0.0, 1.0)}, \ {n"-radius", OPT_FLOAT(scaler[i].radius), M_RANGE(0.5, 16.0)}, \ @@ -371,49 +361,73 @@ const struct m_sub_options gl_video_conf = { .opts = (const m_option_t[]) { {"gpu-dumb-mode", OPT_CHOICE(dumb_mode, {"auto", 0}, {"yes", 1}, {"no", -1})}, - {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0)}, - {"gamma-auto", OPT_FLAG(gamma_auto)}, - {"target-prim", OPT_CHOICE_C(target_prim, mp_csp_prim_names)}, - {"target-trc", OPT_CHOICE_C(target_trc, mp_csp_trc_names)}, + {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0), + .deprecation_message = "no replacement"}, + {"gamma-auto", OPT_BOOL(gamma_auto), + .deprecation_message = "no replacement"}, + {"target-prim", OPT_CHOICE_C(target_prim, pl_csp_prim_names)}, + {"target-trc", OPT_CHOICE_C(target_trc, pl_csp_trc_names)}, {"target-peak", OPT_CHOICE(target_peak, {"auto", 0}), M_RANGE(10, 10000)}, + {"target-contrast", OPT_CHOICE(target_contrast, {"auto", 0}, {"inf", -1}), + M_RANGE(10, 1000000)}, + {"target-gamut", OPT_CHOICE_C(target_gamut, pl_csp_prim_names)}, {"tone-mapping", OPT_CHOICE(tone_map.curve, + {"auto", TONE_MAPPING_AUTO}, {"clip", TONE_MAPPING_CLIP}, {"mobius", TONE_MAPPING_MOBIUS}, {"reinhard", TONE_MAPPING_REINHARD}, {"hable", TONE_MAPPING_HABLE}, {"gamma", TONE_MAPPING_GAMMA}, {"linear", TONE_MAPPING_LINEAR}, - {"bt.2390", TONE_MAPPING_BT_2390})}, + {"spline", TONE_MAPPING_SPLINE}, + {"bt.2390", TONE_MAPPING_BT_2390}, + {"bt.2446a", TONE_MAPPING_BT_2446A}, + {"st2094-40", TONE_MAPPING_ST2094_40}, + {"st2094-10", TONE_MAPPING_ST2094_10})}, + {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)}, + {"inverse-tone-mapping", OPT_BOOL(tone_map.inverse)}, + {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost), + M_RANGE(1.0, 10.0)}, + {"tone-mapping-visualize", OPT_BOOL(tone_map.visualize)}, + {"gamut-mapping-mode", OPT_CHOICE(tone_map.gamut_mode, + {"auto", GAMUT_AUTO}, + {"clip", GAMUT_CLIP}, + {"perceptual", GAMUT_PERCEPTUAL}, + {"relative", GAMUT_RELATIVE}, + {"saturation", GAMUT_SATURATION}, + {"absolute", GAMUT_ABSOLUTE}, + {"desaturate", GAMUT_DESATURATE}, + {"darken", GAMUT_DARKEN}, + {"warn", GAMUT_WARN}, + {"linear", GAMUT_LINEAR})}, {"hdr-compute-peak", OPT_CHOICE(tone_map.compute_peak, {"auto", 0}, {"yes", 1}, {"no", -1})}, + {"hdr-peak-percentile", OPT_FLOAT(tone_map.peak_percentile), + M_RANGE(0.0, 100.0)}, {"hdr-peak-decay-rate", OPT_FLOAT(tone_map.decay_rate), - M_RANGE(1.0, 1000.0)}, + M_RANGE(0.0, 1000.0)}, {"hdr-scene-threshold-low", OPT_FLOAT(tone_map.scene_threshold_low), M_RANGE(0, 20.0)}, {"hdr-scene-threshold-high", OPT_FLOAT(tone_map.scene_threshold_high), M_RANGE(0, 20.0)}, - {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)}, - {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost), - M_RANGE(1.0, 10.0)}, - {"tone-mapping-desaturate", OPT_FLOAT(tone_map.desat)}, - {"tone-mapping-desaturate-exponent", OPT_FLOAT(tone_map.desat_exp), - M_RANGE(0.0, 20.0)}, - {"gamut-warning", OPT_FLAG(tone_map.gamut_warning)}, - {"gamut-clipping", OPT_FLAG(tone_map.gamut_clipping)}, - {"opengl-pbo", OPT_FLAG(pbo)}, + {"hdr-contrast-recovery", OPT_FLOAT(tone_map.contrast_recovery), + M_RANGE(0, 2.0)}, + {"hdr-contrast-smoothness", OPT_FLOAT(tone_map.contrast_smoothness), + M_RANGE(1.0, 100.0)}, + {"opengl-pbo", OPT_BOOL(pbo)}, SCALER_OPTS("scale", SCALER_SCALE), SCALER_OPTS("dscale", SCALER_DSCALE), SCALER_OPTS("cscale", SCALER_CSCALE), SCALER_OPTS("tscale", SCALER_TSCALE), - {"scaler-lut-size", OPT_INT(scaler_lut_size), M_RANGE(4, 10)}, - {"scaler-resizes-only", OPT_FLAG(scaler_resizes_only)}, - {"correct-downscaling", OPT_FLAG(correct_downscaling)}, - {"linear-downscaling", OPT_FLAG(linear_downscaling)}, - {"linear-upscaling", OPT_FLAG(linear_upscaling)}, - {"sigmoid-upscaling", OPT_FLAG(sigmoid_upscaling)}, + {"scaler-lut-size", OPT_REMOVED("hard-coded as 8")}, + {"scaler-resizes-only", OPT_BOOL(scaler_resizes_only)}, + {"correct-downscaling", OPT_BOOL(correct_downscaling)}, + {"linear-downscaling", OPT_BOOL(linear_downscaling)}, + {"linear-upscaling", OPT_BOOL(linear_upscaling)}, + {"sigmoid-upscaling", OPT_BOOL(sigmoid_upscaling)}, {"sigmoid-center", OPT_FLOAT(sigmoid_center), M_RANGE(0.0, 1.0)}, {"sigmoid-slope", OPT_FLOAT(sigmoid_slope), M_RANGE(1.0, 20.0)}, {"fbo-format", OPT_STRING(fbo_format)}, @@ -425,19 +439,18 @@ const struct m_sub_options gl_video_conf = { {"error-diffusion", DITHER_ERROR_DIFFUSION}, {"no", DITHER_NONE})}, {"dither-size-fruit", OPT_INT(dither_size), M_RANGE(2, 8)}, - {"temporal-dither", OPT_FLAG(temporal_dither)}, + {"temporal-dither", OPT_BOOL(temporal_dither)}, {"temporal-dither-period", OPT_INT(temporal_dither_period), M_RANGE(1, 128)}, {"error-diffusion", OPT_STRING_VALIDATE(error_diffusion, validate_error_diffusion_opt)}, - {"alpha", OPT_CHOICE(alpha_mode, - {"no", ALPHA_NO}, - {"yes", ALPHA_YES}, - {"blend", ALPHA_BLEND}, - {"blend-tiles", ALPHA_BLEND_TILES})}, - {"opengl-rectangle-textures", OPT_FLAG(use_rectangle)}, - {"background", OPT_COLOR(background)}, - {"interpolation", OPT_FLAG(interpolation)}, + {"background", OPT_CHOICE(background, + {"none", BACKGROUND_NONE}, + {"color", BACKGROUND_COLOR}, + {"tiles", BACKGROUND_TILES})}, + {"opengl-rectangle-textures", OPT_BOOL(use_rectangle)}, + {"background-color", OPT_COLOR(background_color)}, + {"interpolation", OPT_BOOL(interpolation)}, {"interpolation-threshold", OPT_FLOAT(interpolation_threshold)}, {"blend-subtitles", OPT_CHOICE(blend_subs, {"no", BLEND_SUBS_NO}, @@ -445,32 +458,28 @@ const struct m_sub_options gl_video_conf = { {"video", BLEND_SUBS_VIDEO})}, {"glsl-shaders", OPT_PATHLIST(user_shaders), .flags = M_OPT_FILE}, {"glsl-shader", OPT_CLI_ALIAS("glsl-shaders-append")}, - {"deband", OPT_FLAG(deband)}, + {"glsl-shader-opts", OPT_KEYVALUELIST(user_shader_opts)}, + {"deband", OPT_BOOL(deband)}, {"deband", OPT_SUBSTRUCT(deband_opts, deband_conf)}, {"sharpen", OPT_FLOAT(unsharp)}, {"gpu-tex-pad-x", OPT_INT(tex_pad_x), M_RANGE(0, 4096)}, {"gpu-tex-pad-y", OPT_INT(tex_pad_y), M_RANGE(0, 4096)}, {"", OPT_SUBSTRUCT(icc_opts, mp_icc_conf)}, + {"gpu-shader-cache", OPT_BOOL(shader_cache)}, {"gpu-shader-cache-dir", OPT_STRING(shader_cache_dir), .flags = M_OPT_FILE}, {"gpu-hwdec-interop", OPT_STRING_VALIDATE(hwdec_interop, ra_hwdec_validate_opt)}, - {"opengl-hwdec-interop", OPT_REPLACED("gpu-hwdec-interop")}, - {"hwdec-preload", OPT_REPLACED("opengl-hwdec-interop")}, - {"hdr-tone-mapping", OPT_REPLACED("tone-mapping")}, - {"opengl-shaders", OPT_REPLACED("glsl-shaders")}, - {"opengl-shader", OPT_REPLACED("glsl-shader")}, - {"opengl-shader-cache-dir", OPT_REPLACED("gpu-shader-cache-dir")}, - {"opengl-tex-pad-x", OPT_REPLACED("gpu-tex-pad-x")}, - {"opengl-tex-pad-y", OPT_REPLACED("gpu-tex-pad-y")}, - {"opengl-fbo-format", OPT_REPLACED("fbo-format")}, - {"opengl-dumb-mode", OPT_REPLACED("gpu-dumb-mode")}, - {"opengl-gamma", OPT_REPLACED("gamma-factor")}, - {"linear-scaling", OPT_REMOVED("Split into --linear-upscaling and " - "--linear-downscaling")}, + {"gamut-warning", OPT_REMOVED("Replaced by --gamut-mapping-mode=warn")}, + {"gamut-clipping", OPT_REMOVED("Replaced by --gamut-mapping-mode=desaturate")}, + {"tone-mapping-desaturate", OPT_REMOVED("Replaced by --tone-mapping-mode")}, + {"tone-mapping-desaturate-exponent", OPT_REMOVED("Replaced by --tone-mapping-mode")}, + {"tone-mapping-crosstalk", OPT_REMOVED("Hard-coded as 0.04")}, + {"tone-mapping-mode", OPT_REMOVED("no replacement")}, {0} }, .size = sizeof(struct gl_video_opts), .defaults = &gl_video_opts_def, + .change_flags = UPDATE_VIDEO, }; static void uninit_rendering(struct gl_video *p); @@ -566,6 +575,7 @@ static void uninit_rendering(struct gl_video *p) ra_tex_free(p->ra, &p->merge_tex[n]); ra_tex_free(p->ra, &p->scale_tex[n]); ra_tex_free(p->ra, &p->integer_tex[n]); + ra_tex_free(p->ra, &p->chroma_tex[n]); } ra_tex_free(p->ra, &p->indirect_tex); @@ -593,15 +603,6 @@ bool gl_video_gamma_auto_enabled(struct gl_video *p) return p->opts.gamma_auto; } -struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p) -{ - return (struct mp_colorspace) { - .primaries = p->opts.target_prim, - .gamma = p->opts.target_trc, - .sig_peak = p->opts.target_peak / MP_REF_WHITE, - }; -} - // Warning: profile.start must point to a ta allocation, and the function // takes over ownership. void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) @@ -615,8 +616,8 @@ bool gl_video_icc_auto_enabled(struct gl_video *p) return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; } -static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, - enum mp_csp_trc trc) +static bool gl_video_get_lut3d(struct gl_video *p, enum pl_color_primaries prim, + enum pl_color_transfer trc) { if (!p->use_lut_3d) return false; @@ -664,6 +665,11 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, talloc_free(lut3d); + if (!p->lut_3d_texture) { + p->use_lut_3d = false; + return false; + } + return true; } @@ -754,16 +760,16 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg, struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; - if (p->image_params.chroma_location != MP_CHROMA_CENTER) { - int cx, cy; - mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy); + if (p->image_params.chroma_location != PL_CHROMA_CENTER) { + float cx, cy; + pl_chroma_location_offset(p->image_params.chroma_location, &cx, &cy); // By default texture coordinates are such that chroma is centered with // any chroma subsampling. If a specific direction is given, make it // so that the luma and chroma sample line up exactly. // For 4:4:4, setting chroma location should have no effect at all. // luma sample size (in chroma coord. space) - chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; - chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + chroma.t[0] = ls_w < 1 ? ls_w * -cx : 0; + chroma.t[1] = ls_h < 1 ? ls_h * -cy : 0; } memset(img, 0, 4 * sizeof(img[0])); @@ -779,9 +785,9 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg, ctype = PLANE_NONE; } else if (c == 4) { ctype = PLANE_ALPHA; - } else if (p->image_params.color.space == MP_CSP_RGB) { + } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_RGB) { ctype = PLANE_RGB; - } else if (p->image_params.color.space == MP_CSP_XYZ) { + } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_XYZ) { ctype = PLANE_XYZ; } else { ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA; @@ -793,7 +799,7 @@ static void pass_get_images(struct gl_video *p, struct video_image *vimg, int msb_valid_bits = p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); - int csp = type == PLANE_ALPHA ? MP_CSP_RGB : p->image_params.color.space; + int csp = type == PLANE_ALPHA ? PL_COLOR_SYSTEM_RGB : p->image_params.repr.sys; float tex_mul = 1.0 / mp_get_csp_mul(csp, msb_valid_bits, p->ra_format.component_bits); if (p->ra_format.component_type == RA_CTYPE_FLOAT) @@ -867,14 +873,7 @@ static void init_video(struct gl_video *p) { p->use_integer_conversion = false; - struct ra_hwdec *hwdec = NULL; - for (int n = 0; n < p->num_hwdecs; n++) { - if (ra_hwdec_test_format(p->hwdecs[n], p->image_params.imgfmt)) { - hwdec = p->hwdecs[n]; - break; - } - } - + struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, p->image_params.imgfmt); if (hwdec) { if (hwdec->driver->overlay_frame) { MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " @@ -952,9 +951,6 @@ static void init_video(struct gl_video *p) params.w, params.h); plane->tex = ra_tex_create(p->ra, ¶ms); - if (!plane->tex) - abort(); // shit happens - p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; } } @@ -1058,13 +1054,13 @@ static void uninit_video(struct gl_video *p) ra_hwdec_mapper_free(&p->hwdec_mapper); } -static void pass_record(struct gl_video *p, struct mp_pass_perf perf) +static void pass_record(struct gl_video *p, const struct mp_pass_perf *perf) { if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) return; struct pass_info *pass = &p->pass[p->pass_idx]; - pass->perf = perf; + pass->perf = *perf; if (pass->desc.len == 0) bstr_xappend(p, &pass->desc, bstr0("(unknown)")); @@ -1191,16 +1187,8 @@ static void dispatch_compute(struct gl_video *p, int w, int h, if (!s->tex) continue; - // We need to rescale the coordinates to the true texture size - char *tex_scale = mp_tprintf(32, "tex_scale%d", n); - gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){ - (float)s->w / s->tex->params.w, - (float)s->h / s->tex->params.h, - }); - - PRELUDE("#define texmap%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n); - PRELUDE("#define texmap%d(id) (texture_rot%d * texmap%d_raw(id) + " - "pixel_size%d * texture_off%d)\n", n, n, n, n, n); + PRELUDE("#define texmap%d(id) (texture_rot%d * outcoord(id) + " + "pixel_size%d * texture_off%d)\n", n, n, n, n); PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n); } @@ -1212,12 +1200,13 @@ static void dispatch_compute(struct gl_video *p, int w, int h, if (!(p->ra->caps & RA_CAP_NUM_GROUPS)) PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y); - pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1)); + struct mp_pass_perf perf = gl_sc_dispatch_compute(p->sc, num_x, num_y, 1); + pass_record(p, &perf); cleanup_binds(p); } static struct mp_pass_perf render_pass_quad(struct gl_video *p, - struct ra_fbo fbo, bool discard, + const struct ra_fbo *fbo, bool discard, const struct mp_rect *dst) { // The first element is reserved for `vec2 position` @@ -1275,15 +1264,16 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p, &p->tmp_vertex[num_vertex_attribs * 1], vertex_stride); - return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs, + return gl_sc_dispatch_draw(p->sc, fbo->tex, discard, p->vao, num_vertex_attribs, vertex_stride, p->tmp_vertex, num_vertices); } -static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo, +static void finish_pass_fbo(struct gl_video *p, const struct ra_fbo *fbo, bool discard, const struct mp_rect *dst) { pass_prepare_src_tex(p); - pass_record(p, render_pass_quad(p, fbo, discard, dst)); + struct mp_pass_perf perf = render_pass_quad(p, fbo, discard, dst); + pass_record(p, &perf); debug_check_gl(p, "after rendering"); cleanup_binds(p); } @@ -1320,7 +1310,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, debug_check_gl(p, "after dispatching compute shader"); } else { struct ra_fbo fbo = { .tex = *dst_tex, }; - finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h}); + finish_pass_fbo(p, &fbo, true, &(struct mp_rect){0, 0, w, h}); } } @@ -1333,18 +1323,20 @@ static const char *get_tex_swizzle(struct image *img) // Copy a texture to the vec4 color, while increasing offset. Also applies // the texture multiplier to the sampled color -static void copy_image(struct gl_video *p, int *offset, struct image img) +static void copy_image(struct gl_video *p, unsigned int *offset, struct image img) { - int count = img.components; - assert(*offset + count <= 4); - assert(img.padding + count <= 4); - - int id = pass_bind(p, img); + const unsigned int count = img.components; char src[5] = {0}; char dst[5] = {0}; + + assert(*offset + count < sizeof(dst)); + assert(img.padding + count < sizeof(src)); + + int id = pass_bind(p, img); + const char *tex_fmt = get_tex_swizzle(&img); const char *dst_fmt = "rgba"; - for (int i = 0; i < count; i++) { + for (unsigned int i = 0; i < count; i++) { src[i] = tex_fmt[img.padding + i]; dst[i] = dst_fmt[*offset + i]; } @@ -1403,6 +1395,11 @@ static void hook_prelude(struct gl_video *p, const char *name, int id, GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n", name, name, name, crap); + if (p->ra->caps & RA_CAP_GATHER) { + GLSLHF("#define %s_gather(pos, c) (%s_mul * vec4(" + "textureGather(%s_raw, pos, c)))\n", name, name, name); + } + // Since the extra matrix multiplication impacts performance, // skip it unless the texture was actually rotated if (gl_transform_eq(img.transform, identity_trans)) { @@ -1696,6 +1693,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, double scale_factor, int sizes[]) { + assert(conf); if (scaler_conf_eq(scaler->conf, *conf) && scaler->scale_factor == scale_factor && scaler->initialized) @@ -1703,9 +1701,29 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, uninit_scaler(p, scaler); + if (scaler->index == SCALER_DSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + + if (scaler->index == SCALER_CSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + + struct filter_kernel bare_window; const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name); const struct filter_window *t_window = mp_find_filter_window(conf->window.name); bool is_tscale = scaler->index == SCALER_TSCALE; + if (!t_kernel) { + const struct filter_window *window = mp_find_filter_window(conf->kernel.name); + if (window) { + bare_window = (struct filter_kernel) { .f = *window }; + t_kernel = &bare_window; + } + } scaler->conf = *conf; scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale); @@ -1747,8 +1765,6 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, scaler->kernel->f.radius = conf->radius; scaler->kernel->clamp = conf->clamp; - scaler->kernel->value_cutoff = conf->cutoff; - scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor); int size = scaler->kernel->size; @@ -1760,17 +1776,16 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, int stride = width * num_components; assert(size <= stride); - scaler->lut_size = 1 << p->opts.scaler_lut_size; - - float *weights = talloc_array(NULL, float, scaler->lut_size * stride); - mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights); + static const int lut_size = 256; + float *weights = talloc_array(NULL, float, lut_size * stride); + mp_compute_lut(scaler->kernel, lut_size, stride, weights); bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D); struct ra_tex_params lut_params = { .dimensions = use_1d ? 1 : 2, - .w = use_1d ? scaler->lut_size : width, - .h = use_1d ? 1 : scaler->lut_size, + .w = use_1d ? lut_size : width, + .h = use_1d ? 1 : lut_size, .d = 1, .format = fmt, .render_src = true, @@ -1900,8 +1915,7 @@ static void pass_sample(struct gl_video *p, struct image img, } else if (scaler->kernel) { pass_sample_separated(p, img, scaler, w, h); } else { - // Should never happen - abort(); + MP_ASSERT_UNREACHABLE(); // should never happen } // Apply any required multipliers. Separated scaling already does this in @@ -1932,7 +1946,7 @@ static void deband_hook(struct gl_video *p, struct image img, { pass_describe(p, "debanding (%s)", plane_names[img.type]); pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg, - p->image_params.color.gamma); + p->image_params.color.transfer); } static void unsharp_hook(struct gl_video *p, struct image img, @@ -2023,25 +2037,23 @@ static void user_hook(struct gl_video *p, struct image img, gl_transform_trans(shader->offset, trans); } -static bool add_user_hook(void *priv, struct gl_user_shader_hook hook) +static bool add_user_hook(void *priv, const struct gl_user_shader_hook *hook) { struct gl_video *p = priv; - struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy); - *copy = hook; - + struct gl_user_shader_hook *copy = talloc_dup(p, (struct gl_user_shader_hook *)hook); struct tex_hook texhook = { - .save_tex = bstrdup0(copy, hook.save_tex), - .components = hook.components, - .align_offset = hook.align_offset, + .save_tex = bstrdup0(copy, copy->save_tex), + .components = copy->components, + .align_offset = copy->align_offset, .hook = user_hook, .cond = user_hook_cond, .priv = copy, }; for (int h = 0; h < SHADER_MAX_HOOKS; h++) - texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]); + texhook.hook_tex[h] = bstrdup0(copy, copy->hook_tex[h]); for (int h = 0; h < SHADER_MAX_BINDS; h++) - texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]); + texhook.bind_tex[h] = bstrdup0(copy, copy->bind_tex[h]); MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook); return true; @@ -2190,6 +2202,23 @@ static void pass_read_video(struct gl_video *p) } } + // If chroma textures are in a subsampled semi-planar format and rotated, + // introduce an explicit conversion pass to avoid breaking chroma scalers. + for (int n = 0; n < 4; n++) { + if (img[n].tex && img[n].type == PLANE_CHROMA && + img[n].tex->params.format->num_components == 2 && + p->image_params.rotate % 180 == 90 && + p->ra_format.chroma_w != 1) + { + GLSLF("// chroma fix for rotated plane %d\n", n); + copy_image(p, &(int){0}, img[n]); + pass_describe(p, "chroma fix for rotated plane"); + finish_pass_tex(p, &p->chroma_tex[n], img[n].w, img[n].h); + img[n] = image_wrap(p->chroma_tex[n], img[n].type, + img[n].components); + } + } + // At this point all planes are finalized but they may not be at the // required size yet. Furthermore, they may have texture offsets that // require realignment. @@ -2265,6 +2294,13 @@ static void pass_read_video(struct gl_video *p) continue; const struct scaler_config *conf = &p->opts.scaler[scaler_id]; + + if (scaler_id == SCALER_CSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + struct scaler *scaler = &p->scaler[scaler_id]; // bilinear scaling is a free no-op thanks to GPU sampling @@ -2315,22 +2351,29 @@ static void pass_convert_yuv(struct gl_video *p) GLSLF("color = color.%s;\n", p->color_swizzle); // Pre-colormatrix input gamma correction - if (cparams.color.space == MP_CSP_XYZ) - GLSL(color.rgb = pow(color.rgb, vec3(2.6));) // linear light + if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ) + pass_linearize(p->sc, p->image_params.color.transfer); // We always explicitly normalize the range in pass_read_video cparams.input_bits = cparams.texture_bits = 0; // Conversion to RGB. For RGB itself, this still applies e.g. brightness // and contrast controls, or expansion of e.g. LSB-packed 10 bit data. - struct mp_cmat m = {{{0}}}; + struct pl_transform3x3 m = {0}; mp_get_csp_matrix(&cparams, &m); - gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]); + gl_sc_uniform_mat3(sc, "colormatrix", true, &m.mat.m[0][0]); gl_sc_uniform_vec3(sc, "colormatrix_c", m.c); GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) - if (p->image_params.color.space == MP_CSP_BT_2020_C) { + if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ) { + pass_delinearize(p->sc, p->image_params.color.transfer); + // mp_get_csp_matrix implicitly converts XYZ to DCI-P3 + p->image_params.repr.sys = PL_COLOR_SYSTEM_RGB; + p->image_params.color.primaries = PL_COLOR_PRIM_DCI_P3; + } + + if (p->image_params.repr.sys == PL_COLOR_SYSTEM_BT_2020_C) { // Conversion for C'rcY'cC'bc via the BT.2020 CL system: // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 // = (B'-Y'c) / 1.5816 | C'bc > 0 @@ -2341,32 +2384,35 @@ static void pass_convert_yuv(struct gl_video *p) // as per the BT.2020 specification, table 4. This is a non-linear // transformation because (constant) luminance receives non-equal // contributions from the three different channels. - GLSLF("// constant luminance conversion\n"); - GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936), - vec2(1.9404, 1.7184), - lessThanEqual(color.br, vec2(0))) - + color.gg;) + GLSLF("// constant luminance conversion \n" + "color.br = color.br * mix(vec2(1.5816, 0.9936), \n" + " vec2(1.9404, 1.7184), \n" + " %s(lessThanEqual(color.br, vec2(0))))\n" + " + color.gg; \n", + gl_sc_bvec(p->sc, 2)); // Expand channels to camera-linear light. This shader currently just // assumes everything uses the BT.2020 12-bit gamma function, since the // difference between 10 and 12-bit is negligible for anything other // than 12-bit content. - GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), - pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), - vec3(1.0/0.45)), - lessThanEqual(vec3(0.08145), color.rgb));) + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5), \n" + " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" + " vec3(1.0/0.45)), \n" + " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n", + gl_sc_bvec(p->sc, 3)); // Calculate the green channel from the expanded RYcB // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;) // Recompress to receive the R'G'B' result, same as other systems - GLSL(color.rgb = mix(color.rgb * vec3(4.5), - vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), - lessThanEqual(vec3(0.0181), color.rgb));) + GLSLF("color.rgb = mix(color.rgb * vec3(4.5), \n" + " vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n" + " %s(lessThanEqual(vec3(0.0181), color.rgb))); \n", + gl_sc_bvec(p->sc, 3)); } p->components = 3; - if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) { + if (!p->has_alpha) { GLSL(color.a = 1.0;) - } else if (p->image_params.alpha == MP_ALPHA_PREMUL) { + } else if (p->image_params.repr.alpha == PL_ALPHA_PREMULTIPLIED) { p->components = 4; } else { p->components = 4; @@ -2451,7 +2497,7 @@ static void pass_scale_main(struct gl_video *p) // Linear light downscaling results in nasty artifacts for HDR curves // due to the potentially extreme brightness differences severely // compounding any ringing. So just scale in gamma light instead. - if (mp_trc_is_hdr(p->image_params.color.gamma)) + if (pl_color_space_is_hdr(&p->image_params.color)) use_linear = false; } else if (upscaling) { use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling; @@ -2459,7 +2505,7 @@ static void pass_scale_main(struct gl_video *p) if (use_linear) { p->use_linear = true; - pass_linearize(p->sc, p->image_params.color.gamma); + pass_linearize(p->sc, p->image_params.color.transfer); pass_opt_hook_point(p, "LINEAR", NULL); } @@ -2512,8 +2558,9 @@ static void pass_scale_main(struct gl_video *p) // rendering) // If OSD is true, ignore any changes that may have been made to the video // by previous passes (i.e. linear scaling) -static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, - struct mp_colorspace fbo_csp, bool osd) +static void pass_colormanage(struct gl_video *p, struct pl_color_space src, + enum mp_csp_light src_light, + struct pl_color_space fbo_csp, int flags, bool osd) { struct ra *ra = p->ra; @@ -2521,18 +2568,17 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, // unless specific transfer function, primaries or target peak // is set. If values are set to _AUTO, the most likely intended // values are guesstimated later in this function. - struct mp_colorspace dst = { - .gamma = p->opts.target_trc == MP_CSP_TRC_AUTO ? - fbo_csp.gamma : p->opts.target_trc, - .primaries = p->opts.target_prim == MP_CSP_PRIM_AUTO ? + struct pl_color_space dst = { + .transfer = p->opts.target_trc == PL_COLOR_TRC_UNKNOWN ? + fbo_csp.transfer : p->opts.target_trc, + .primaries = p->opts.target_prim == PL_COLOR_PRIM_UNKNOWN ? fbo_csp.primaries : p->opts.target_prim, - .light = MP_CSP_LIGHT_DISPLAY, - .sig_peak = !p->opts.target_peak ? - fbo_csp.sig_peak : p->opts.target_peak / MP_REF_WHITE, + .hdr.max_luma = !p->opts.target_peak ? + fbo_csp.hdr.max_luma : p->opts.target_peak, }; if (!p->colorspace_override_warned && - ((fbo_csp.gamma && dst.gamma != fbo_csp.gamma) || + ((fbo_csp.transfer && dst.transfer != fbo_csp.transfer) || (fbo_csp.primaries && dst.primaries != fbo_csp.primaries))) { MP_WARN(p, "One or more colorspace value is being overridden " @@ -2540,44 +2586,44 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, "transfer function: (dst: %s, fbo: %s), " "primaries: (dst: %s, fbo: %s). " "Rendering can lead to incorrect results!\n", - m_opt_choice_str(mp_csp_trc_names, dst.gamma), - m_opt_choice_str(mp_csp_trc_names, fbo_csp.gamma), - m_opt_choice_str(mp_csp_prim_names, dst.primaries), - m_opt_choice_str(mp_csp_prim_names, fbo_csp.primaries)); + m_opt_choice_str(pl_csp_trc_names, dst.transfer), + m_opt_choice_str(pl_csp_trc_names, fbo_csp.transfer), + m_opt_choice_str(pl_csp_prim_names, dst.primaries), + m_opt_choice_str(pl_csp_prim_names, fbo_csp.primaries)); p->colorspace_override_warned = true; } - if (dst.gamma == MP_CSP_TRC_HLG) - dst.light = MP_CSP_LIGHT_SCENE_HLG; + enum mp_csp_light dst_light = dst.transfer == PL_COLOR_TRC_HLG ? + MP_CSP_LIGHT_SCENE_HLG : MP_CSP_LIGHT_DISPLAY; - if (p->use_lut_3d) { + if (p->use_lut_3d && (flags & RENDER_SCREEN_COLOR)) { // The 3DLUT is always genera |