diff options
Diffstat (limited to 'video/out/gpu')
-rw-r--r-- | video/out/gpu/context.c | 8 | ||||
-rw-r--r-- | video/out/gpu/libmpv_gpu.c | 15 | ||||
-rw-r--r-- | video/out/gpu/ra.h | 1 | ||||
-rw-r--r-- | video/out/gpu/shader_cache.c | 3 | ||||
-rw-r--r-- | video/out/gpu/spirv.c | 10 | ||||
-rw-r--r-- | video/out/gpu/spirv_shaderc.c | 2 | ||||
-rw-r--r-- | video/out/gpu/utils.c | 24 | ||||
-rw-r--r-- | video/out/gpu/video.c | 51 | ||||
-rw-r--r-- | video/out/gpu/video.h | 3 | ||||
-rw-r--r-- | video/out/gpu/video_shaders.c | 30 |
10 files changed, 86 insertions, 61 deletions
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c index 36f9c2dad5..85f1aa7667 100644 --- a/video/out/gpu/context.c +++ b/video/out/gpu/context.c @@ -62,7 +62,7 @@ static const struct ra_ctx_fns *contexts[] = { #endif // OpenGL contexts: -#if HAVE_ANDROID +#if HAVE_EGL_ANDROID &ra_ctx_android, #endif #if HAVE_RPI @@ -80,6 +80,9 @@ static const struct ra_ctx_fns *contexts[] = { #if HAVE_GL_DXINTEROP &ra_ctx_dxgl, #endif +#if HAVE_GL_WAYLAND + &ra_ctx_wayland_egl, +#endif #if HAVE_GL_X11 &ra_ctx_glx_probe, #endif @@ -89,9 +92,6 @@ static const struct ra_ctx_fns *contexts[] = { #if HAVE_GL_X11 &ra_ctx_glx, #endif -#if HAVE_GL_WAYLAND - &ra_ctx_wayland_egl, -#endif #if HAVE_EGL_DRM &ra_ctx_drm_egl, #endif diff --git a/video/out/gpu/libmpv_gpu.c b/video/out/gpu/libmpv_gpu.c index fce2acfa4d..5ca4ebb7ca 100644 --- a/video/out/gpu/libmpv_gpu.c +++ b/video/out/gpu/libmpv_gpu.c @@ -36,9 +36,9 @@ static const struct native_resource_entry native_resource_map[] = { .name = "drm_params", .size = sizeof (mpv_opengl_drm_params), }, - [MPV_RENDER_PARAM_DRM_OSD_SIZE] = { - .name = "drm_osd_size", - .size = sizeof (mpv_opengl_drm_osd_size), + [MPV_RENDER_PARAM_DRM_DRAW_SURFACE_SIZE] = { + .name = "drm_draw_surface_size", + .size = sizeof (mpv_opengl_drm_draw_surface_size), }, }; @@ -207,6 +207,14 @@ static void screenshot(struct render_backend *ctx, struct vo_frame *frame, gl_video_screenshot(p->renderer, frame, args); } +static void perfdata(struct render_backend *ctx, + struct voctrl_performance_data *out) +{ + struct priv *p = ctx->priv; + + gl_video_perfdata(p->renderer, out); +} + static void destroy(struct render_backend *ctx) { struct priv *p = ctx->priv; @@ -235,5 +243,6 @@ const struct render_backend_fns render_backend_gpu = { .render = render, .get_image = get_image, .screenshot = screenshot, + .perfdata = perfdata, .destroy = destroy, }; diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h index 79caacc919..748b485c95 100644 --- a/video/out/gpu/ra.h +++ b/video/out/gpu/ra.h @@ -188,6 +188,7 @@ enum ra_buf_type { RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage) + RA_BUF_TYPE_SHARED_MEMORY, // device memory for sharing with external API }; struct ra_buf_params { diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c index f38f0a49fc..fa4560597f 100644 --- a/video/out/gpu/shader_cache.c +++ b/video/out/gpu/shader_cache.c @@ -666,8 +666,7 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) struct sc_uniform *u = &sc->uniforms[n]; if (u->type != SC_UNIFORM_TYPE_PUSHC) continue; - // push constants don't support explicit offsets - ADD(dst, "/*offset=%zu*/ %s %s;\n", u->offset, u->glsl_type, + ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, u->input.name); } ADD(dst, "};\n"); diff --git a/video/out/gpu/spirv.c b/video/out/gpu/spirv.c index e20fbe7483..ee11d601a3 100644 --- a/video/out/gpu/spirv.c +++ b/video/out/gpu/spirv.c @@ -5,22 +5,17 @@ #include "config.h" extern const struct spirv_compiler_fns spirv_shaderc; -extern const struct spirv_compiler_fns spirv_nvidia_builtin; // in probe-order enum { SPIRV_AUTO = 0, SPIRV_SHADERC, // generally preferred, but not packaged everywhere - SPIRV_NVIDIA, // can be useful for testing, only available on nvidia }; static const struct spirv_compiler_fns *compilers[] = { #if HAVE_SHADERC [SPIRV_SHADERC] = &spirv_shaderc, #endif -#if HAVE_VULKAN - [SPIRV_NVIDIA] = &spirv_nvidia_builtin, -#endif }; static const struct m_opt_choice_alternatives compiler_choices[] = { @@ -28,9 +23,6 @@ static const struct m_opt_choice_alternatives compiler_choices[] = { #if HAVE_SHADERC {"shaderc", SPIRV_SHADERC}, #endif -#if HAVE_VULKAN - {"nvidia", SPIRV_NVIDIA}, -#endif {0} }; @@ -65,7 +57,7 @@ bool spirv_compiler_init(struct ra_ctx *ctx) ctx->spirv->fns = compilers[i]; const char *name = m_opt_choice_str(compiler_choices, i); - strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name)); + strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name) - 1); MP_VERBOSE(ctx, "Initializing SPIR-V compiler '%s'\n", name); if (ctx->spirv->fns->init(ctx)) return true; diff --git a/video/out/gpu/spirv_shaderc.c b/video/out/gpu/spirv_shaderc.c index ee702053d5..f285631f14 100644 --- a/video/out/gpu/spirv_shaderc.c +++ b/video/out/gpu/spirv_shaderc.c @@ -32,7 +32,7 @@ static bool shaderc_init(struct ra_ctx *ctx) goto error; shaderc_compile_options_set_optimization_level(p->opts, - shaderc_optimization_level_size); + shaderc_optimization_level_performance); if (ctx->opts.debug) shaderc_compile_options_set_generate_debug_info(p->opts); diff --git a/video/out/gpu/utils.c b/video/out/gpu/utils.c index 078a31c895..9234545a71 100644 --- a/video/out/gpu/utils.c +++ b/video/out/gpu/utils.c @@ -141,16 +141,17 @@ struct ra_layout std140_layout(struct ra_renderpass_input *inp) // the nearest multiple of vec4 // 4. Matrices are treated like arrays of vectors // 5. Arrays/matrices are laid out with a stride equal to the alignment - size_t size = el_size * inp->dim_v; + size_t stride = el_size * inp->dim_v; + size_t align = stride; if (inp->dim_v == 3) - size += el_size; + align += el_size; if (inp->dim_m > 1) - size = MP_ALIGN_UP(size, sizeof(float[4])); + stride = align = MP_ALIGN_UP(stride, sizeof(float[4])); return (struct ra_layout) { - .align = size, - .stride = size, - .size = size * inp->dim_m, + .align = align, + .stride = stride, + .size = stride * inp->dim_m, }; } @@ -160,14 +161,15 @@ struct ra_layout std430_layout(struct ra_renderpass_input *inp) // std430 packing rules: like std140, except arrays/matrices are always // "tightly" packed, even arrays/matrices of vec3s - size_t size = el_size * inp->dim_v; + size_t stride = el_size * inp->dim_v; + size_t align = stride; if (inp->dim_v == 3 && inp->dim_m == 1) - size += el_size; + align += el_size; return (struct ra_layout) { - .align = size, - .stride = size, - .size = size * inp->dim_m, + .align = align, + .stride = stride, + .size = stride * inp->dim_m, }; } diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 46d9026742..13e5b06918 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -373,8 +373,9 @@ const struct m_sub_options gl_video_conf = { SCALER_OPTS("tscale", SCALER_TSCALE), OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10), OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0), - OPT_FLAG("linear-scaling", linear_scaling, 0), OPT_FLAG("correct-downscaling", correct_downscaling, 0), + OPT_FLAG("linear-downscaling", linear_downscaling, 0), + OPT_FLAG("linear-upscaling", linear_upscaling, 0), OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0), OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0), OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0), @@ -423,6 +424,8 @@ const struct m_sub_options gl_video_conf = { OPT_REPLACED("opengl-fbo-format", "fbo-format"), OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"), OPT_REPLACED("opengl-gamma", "gamma-factor"), + OPT_REMOVED("linear-scaling", "Split into --linear-upscaling and " + "--linear-downscaling"), {0} }, .size = sizeof(struct gl_video_opts), @@ -1103,8 +1106,14 @@ static void cleanup_binds(struct gl_video *p) // Sets the appropriate compute shader metadata for an implicit compute pass // bw/bh: block size -static void pass_is_compute(struct gl_video *p, int bw, int bh) +static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible) { + if (p->pass_compute.active && flexible) { + // Avoid overwriting existing block sizes when using a flexible pass + bw = p->pass_compute.block_w; + bh = p->pass_compute.block_h; + } + p->pass_compute = (struct compute_info){ .active = true, .block_w = bw, @@ -1248,7 +1257,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders // over fragment shaders wherever possible. if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE)) - pass_is_compute(p, 16, 16); + pass_is_compute(p, 16, 16, true); if (p->pass_compute.active) { gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex); @@ -1744,7 +1753,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler if (shmem_req > p->ra->max_shmem) goto fallback; - pass_is_compute(p, bw, bh); + pass_is_compute(p, bw, bh, false); pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih); return; @@ -2326,13 +2335,18 @@ static void pass_scale_main(struct gl_video *p) // Pre-conversion, like linear light/sigmoidization GLSLF("// scaler pre-conversion\n"); - bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling; + bool use_linear = false; + if (downscaling) { + use_linear = p->opts.linear_downscaling; - // Linear light downscaling results in nasty artifacts for HDR curves due - // to the potentially extreme brightness differences severely compounding - // any ringing. So just scale in gamma light instead. - if (mp_trc_is_hdr(p->image_params.color.gamma) && downscaling) - use_linear = false; + // Linear light downscaling results in nasty artifacts for HDR curves + // due to the potentially extreme brightness differences severely + // compounding any ringing. So just scale in gamma light instead. + if (mp_trc_is_hdr(p->image_params.color.gamma)) + use_linear = false; + } else if (upscaling) { + use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling; + } if (use_linear) { p->use_linear = true; @@ -2485,7 +2499,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool if (detect_peak) { pass_describe(p, "detect HDR peak"); - pass_is_compute(p, 8, 8); // 8x8 is good for performance + pass_is_compute(p, 8, 8, true); // 8x8 is good for performance gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, "uint counter;" "uint frame_idx;" @@ -3488,9 +3502,9 @@ static bool check_dumb_mode(struct gl_video *p) return false; // otherwise, use auto-detection - if (o->target_prim || o->target_trc || o->linear_scaling || - o->correct_downscaling || o->sigmoid_upscaling || o->interpolation || - o->blend_subs || o->deband || o->unsharp) + if (o->target_prim || o->target_trc || o->correct_downscaling || + o->linear_downscaling || o->linear_upscaling || o->sigmoid_upscaling || + o->interpolation || o->blend_subs || o->deband || o->unsharp) return false; // check remaining scalers (tscale is already implicitly excluded above) for (int i = 0; i < SCALER_COUNT; i++) { @@ -3519,7 +3533,7 @@ static void check_gl_features(struct gl_video *p) bool have_ssbo = ra->caps & RA_CAP_BUF_RW; bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD; - const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgba16hf", + const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16", "rgb10_a2", "rgba8", 0}; const char *user_fbo_fmts[] = {p->opts.fbo_format, 0}; const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto") @@ -3646,8 +3660,11 @@ static void check_gl_features(struct gl_video *p) p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d; // mix() is needed for some gamma functions - if (!have_mglsl && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) { - p->opts.linear_scaling = false; + if (!have_mglsl && (p->opts.linear_downscaling || + p->opts.linear_upscaling || p->opts.sigmoid_upscaling)) + { + p->opts.linear_downscaling = false; + p->opts.linear_upscaling = false; p->opts.sigmoid_upscaling = false; MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n"); } diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index 2184599582..ca8b6f65d4 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -112,8 +112,9 @@ struct gl_video_opts { float tone_mapping_param; float tone_mapping_desat; int gamut_warning; - int linear_scaling; int correct_downscaling; + int linear_downscaling; + int linear_upscaling; int sigmoid_upscaling; float sigmoid_center; float sigmoid_slope; diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 19fb0ccde8..342fb39ded 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -655,18 +655,6 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, GLSLF("float sig_peak = %f;\n", src_peak); GLSLF("float sig_avg = %f;\n", sdr_avg); - // Desaturate the color using a coefficient dependent on the signal - // Do this before peak detection in order to try and reclaim as much - // dynamic range as possible. - if (desat > 0) { - float base = 0.18 * dst_peak; - GLSL(float luma = dot(dst_luma, color.rgb);) - GLSLF("float coeff = max(sig - %f, 1e-6) / max(sig, 1e-6);\n", base); - GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat); - GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);) - GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig` - } - if (detect_peak) hdr_update_peak(sc); @@ -683,6 +671,18 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, GLSL(sig *= slope;) GLSL(sig_peak *= slope;) + // Desaturate the color using a coefficient dependent on the signal. + // Do this after peak detection in order to prevent over-desaturating + // overly bright souces + if (desat > 0) { + float base = 0.18 * dst_peak; + GLSL(float luma = dot(dst_luma, color.rgb);) + GLSLF("float coeff = max(sig - %f, 1e-6) / max(sig, 1e-6);\n", base); + GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat); + GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);) + GLSL(sig = mix(sig, luma * slope, coeff);) // also make sure to update `sig` + } + switch (algo) { case TONE_MAPPING_CLIP: GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); @@ -833,10 +833,14 @@ void pass_color_map(struct gl_shader_cache *sc, // Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. // Obtain random numbers by calling rand(h), followed by h = permute(h) to // update the state. Assumes the texture was hooked. +// permute() was modified from the original to avoid "large" numbers in +// calculations, since low-end mobile GPUs choke on them (overflow). static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) { GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; }) - GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); }) + GLSLHF("float permute(float x) {\n"); + GLSLH(return mod289( mod289(34.0*x + 1.0) * (fract(x) + 1.0) );) + GLSLHF("}\n"); GLSLH(float rand(float x) { return fract(x * 1.0/41.0); }) // Initialize the PRNG by hashing the position + a random uniform |