summaryrefslogtreecommitdiffstats
path: root/video/out/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/gpu')
-rw-r--r--video/out/gpu/context.c8
-rw-r--r--video/out/gpu/libmpv_gpu.c15
-rw-r--r--video/out/gpu/ra.h1
-rw-r--r--video/out/gpu/shader_cache.c3
-rw-r--r--video/out/gpu/spirv.c10
-rw-r--r--video/out/gpu/spirv_shaderc.c2
-rw-r--r--video/out/gpu/utils.c24
-rw-r--r--video/out/gpu/video.c51
-rw-r--r--video/out/gpu/video.h3
-rw-r--r--video/out/gpu/video_shaders.c30
10 files changed, 86 insertions, 61 deletions
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
index 36f9c2dad5..85f1aa7667 100644
--- a/video/out/gpu/context.c
+++ b/video/out/gpu/context.c
@@ -62,7 +62,7 @@ static const struct ra_ctx_fns *contexts[] = {
#endif
// OpenGL contexts:
-#if HAVE_ANDROID
+#if HAVE_EGL_ANDROID
&ra_ctx_android,
#endif
#if HAVE_RPI
@@ -80,6 +80,9 @@ static const struct ra_ctx_fns *contexts[] = {
#if HAVE_GL_DXINTEROP
&ra_ctx_dxgl,
#endif
+#if HAVE_GL_WAYLAND
+ &ra_ctx_wayland_egl,
+#endif
#if HAVE_GL_X11
&ra_ctx_glx_probe,
#endif
@@ -89,9 +92,6 @@ static const struct ra_ctx_fns *contexts[] = {
#if HAVE_GL_X11
&ra_ctx_glx,
#endif
-#if HAVE_GL_WAYLAND
- &ra_ctx_wayland_egl,
-#endif
#if HAVE_EGL_DRM
&ra_ctx_drm_egl,
#endif
diff --git a/video/out/gpu/libmpv_gpu.c b/video/out/gpu/libmpv_gpu.c
index fce2acfa4d..5ca4ebb7ca 100644
--- a/video/out/gpu/libmpv_gpu.c
+++ b/video/out/gpu/libmpv_gpu.c
@@ -36,9 +36,9 @@ static const struct native_resource_entry native_resource_map[] = {
.name = "drm_params",
.size = sizeof (mpv_opengl_drm_params),
},
- [MPV_RENDER_PARAM_DRM_OSD_SIZE] = {
- .name = "drm_osd_size",
- .size = sizeof (mpv_opengl_drm_osd_size),
+ [MPV_RENDER_PARAM_DRM_DRAW_SURFACE_SIZE] = {
+ .name = "drm_draw_surface_size",
+ .size = sizeof (mpv_opengl_drm_draw_surface_size),
},
};
@@ -207,6 +207,14 @@ static void screenshot(struct render_backend *ctx, struct vo_frame *frame,
gl_video_screenshot(p->renderer, frame, args);
}
+static void perfdata(struct render_backend *ctx,
+ struct voctrl_performance_data *out)
+{
+ struct priv *p = ctx->priv;
+
+ gl_video_perfdata(p->renderer, out);
+}
+
static void destroy(struct render_backend *ctx)
{
struct priv *p = ctx->priv;
@@ -235,5 +243,6 @@ const struct render_backend_fns render_backend_gpu = {
.render = render,
.get_image = get_image,
.screenshot = screenshot,
+ .perfdata = perfdata,
.destroy = destroy,
};
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index 79caacc919..748b485c95 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -188,6 +188,7 @@ enum ra_buf_type {
RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW
RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO
RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage)
+ RA_BUF_TYPE_SHARED_MEMORY, // device memory for sharing with external API
};
struct ra_buf_params {
diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c
index f38f0a49fc..fa4560597f 100644
--- a/video/out/gpu/shader_cache.c
+++ b/video/out/gpu/shader_cache.c
@@ -666,8 +666,7 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
struct sc_uniform *u = &sc->uniforms[n];
if (u->type != SC_UNIFORM_TYPE_PUSHC)
continue;
- // push constants don't support explicit offsets
- ADD(dst, "/*offset=%zu*/ %s %s;\n", u->offset, u->glsl_type,
+ ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
u->input.name);
}
ADD(dst, "};\n");
diff --git a/video/out/gpu/spirv.c b/video/out/gpu/spirv.c
index e20fbe7483..ee11d601a3 100644
--- a/video/out/gpu/spirv.c
+++ b/video/out/gpu/spirv.c
@@ -5,22 +5,17 @@
#include "config.h"
extern const struct spirv_compiler_fns spirv_shaderc;
-extern const struct spirv_compiler_fns spirv_nvidia_builtin;
// in probe-order
enum {
SPIRV_AUTO = 0,
SPIRV_SHADERC, // generally preferred, but not packaged everywhere
- SPIRV_NVIDIA, // can be useful for testing, only available on nvidia
};
static const struct spirv_compiler_fns *compilers[] = {
#if HAVE_SHADERC
[SPIRV_SHADERC] = &spirv_shaderc,
#endif
-#if HAVE_VULKAN
- [SPIRV_NVIDIA] = &spirv_nvidia_builtin,
-#endif
};
static const struct m_opt_choice_alternatives compiler_choices[] = {
@@ -28,9 +23,6 @@ static const struct m_opt_choice_alternatives compiler_choices[] = {
#if HAVE_SHADERC
{"shaderc", SPIRV_SHADERC},
#endif
-#if HAVE_VULKAN
- {"nvidia", SPIRV_NVIDIA},
-#endif
{0}
};
@@ -65,7 +57,7 @@ bool spirv_compiler_init(struct ra_ctx *ctx)
ctx->spirv->fns = compilers[i];
const char *name = m_opt_choice_str(compiler_choices, i);
- strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name));
+ strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name) - 1);
MP_VERBOSE(ctx, "Initializing SPIR-V compiler '%s'\n", name);
if (ctx->spirv->fns->init(ctx))
return true;
diff --git a/video/out/gpu/spirv_shaderc.c b/video/out/gpu/spirv_shaderc.c
index ee702053d5..f285631f14 100644
--- a/video/out/gpu/spirv_shaderc.c
+++ b/video/out/gpu/spirv_shaderc.c
@@ -32,7 +32,7 @@ static bool shaderc_init(struct ra_ctx *ctx)
goto error;
shaderc_compile_options_set_optimization_level(p->opts,
- shaderc_optimization_level_size);
+ shaderc_optimization_level_performance);
if (ctx->opts.debug)
shaderc_compile_options_set_generate_debug_info(p->opts);
diff --git a/video/out/gpu/utils.c b/video/out/gpu/utils.c
index 078a31c895..9234545a71 100644
--- a/video/out/gpu/utils.c
+++ b/video/out/gpu/utils.c
@@ -141,16 +141,17 @@ struct ra_layout std140_layout(struct ra_renderpass_input *inp)
// the nearest multiple of vec4
// 4. Matrices are treated like arrays of vectors
// 5. Arrays/matrices are laid out with a stride equal to the alignment
- size_t size = el_size * inp->dim_v;
+ size_t stride = el_size * inp->dim_v;
+ size_t align = stride;
if (inp->dim_v == 3)
- size += el_size;
+ align += el_size;
if (inp->dim_m > 1)
- size = MP_ALIGN_UP(size, sizeof(float[4]));
+ stride = align = MP_ALIGN_UP(stride, sizeof(float[4]));
return (struct ra_layout) {
- .align = size,
- .stride = size,
- .size = size * inp->dim_m,
+ .align = align,
+ .stride = stride,
+ .size = stride * inp->dim_m,
};
}
@@ -160,14 +161,15 @@ struct ra_layout std430_layout(struct ra_renderpass_input *inp)
// std430 packing rules: like std140, except arrays/matrices are always
// "tightly" packed, even arrays/matrices of vec3s
- size_t size = el_size * inp->dim_v;
+ size_t stride = el_size * inp->dim_v;
+ size_t align = stride;
if (inp->dim_v == 3 && inp->dim_m == 1)
- size += el_size;
+ align += el_size;
return (struct ra_layout) {
- .align = size,
- .stride = size,
- .size = size * inp->dim_m,
+ .align = align,
+ .stride = stride,
+ .size = stride * inp->dim_m,
};
}
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 46d9026742..13e5b06918 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -373,8 +373,9 @@ const struct m_sub_options gl_video_conf = {
SCALER_OPTS("tscale", SCALER_TSCALE),
OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10),
OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0),
- OPT_FLAG("linear-scaling", linear_scaling, 0),
OPT_FLAG("correct-downscaling", correct_downscaling, 0),
+ OPT_FLAG("linear-downscaling", linear_downscaling, 0),
+ OPT_FLAG("linear-upscaling", linear_upscaling, 0),
OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0),
OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0),
OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0),
@@ -423,6 +424,8 @@ const struct m_sub_options gl_video_conf = {
OPT_REPLACED("opengl-fbo-format", "fbo-format"),
OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"),
OPT_REPLACED("opengl-gamma", "gamma-factor"),
+ OPT_REMOVED("linear-scaling", "Split into --linear-upscaling and "
+ "--linear-downscaling"),
{0}
},
.size = sizeof(struct gl_video_opts),
@@ -1103,8 +1106,14 @@ static void cleanup_binds(struct gl_video *p)
// Sets the appropriate compute shader metadata for an implicit compute pass
// bw/bh: block size
-static void pass_is_compute(struct gl_video *p, int bw, int bh)
+static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible)
{
+ if (p->pass_compute.active && flexible) {
+ // Avoid overwriting existing block sizes when using a flexible pass
+ bw = p->pass_compute.block_w;
+ bh = p->pass_compute.block_h;
+ }
+
p->pass_compute = (struct compute_info){
.active = true,
.block_w = bw,
@@ -1248,7 +1257,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
// If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
// over fragment shaders wherever possible.
if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
- pass_is_compute(p, 16, 16);
+ pass_is_compute(p, 16, 16, true);
if (p->pass_compute.active) {
gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
@@ -1744,7 +1753,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler
if (shmem_req > p->ra->max_shmem)
goto fallback;
- pass_is_compute(p, bw, bh);
+ pass_is_compute(p, bw, bh, false);
pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih);
return;
@@ -2326,13 +2335,18 @@ static void pass_scale_main(struct gl_video *p)
// Pre-conversion, like linear light/sigmoidization
GLSLF("// scaler pre-conversion\n");
- bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
+ bool use_linear = false;
+ if (downscaling) {
+ use_linear = p->opts.linear_downscaling;
- // Linear light downscaling results in nasty artifacts for HDR curves due
- // to the potentially extreme brightness differences severely compounding
- // any ringing. So just scale in gamma light instead.
- if (mp_trc_is_hdr(p->image_params.color.gamma) && downscaling)
- use_linear = false;
+ // Linear light downscaling results in nasty artifacts for HDR curves
+ // due to the potentially extreme brightness differences severely
+ // compounding any ringing. So just scale in gamma light instead.
+ if (mp_trc_is_hdr(p->image_params.color.gamma))
+ use_linear = false;
+ } else if (upscaling) {
+ use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling;
+ }
if (use_linear) {
p->use_linear = true;
@@ -2485,7 +2499,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
if (detect_peak) {
pass_describe(p, "detect HDR peak");
- pass_is_compute(p, 8, 8); // 8x8 is good for performance
+ pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
"uint counter;"
"uint frame_idx;"
@@ -3488,9 +3502,9 @@ static bool check_dumb_mode(struct gl_video *p)
return false;
// otherwise, use auto-detection
- if (o->target_prim || o->target_trc || o->linear_scaling ||
- o->correct_downscaling || o->sigmoid_upscaling || o->interpolation ||
- o->blend_subs || o->deband || o->unsharp)
+ if (o->target_prim || o->target_trc || o->correct_downscaling ||
+ o->linear_downscaling || o->linear_upscaling || o->sigmoid_upscaling ||
+ o->interpolation || o->blend_subs || o->deband || o->unsharp)
return false;
// check remaining scalers (tscale is already implicitly excluded above)
for (int i = 0; i < SCALER_COUNT; i++) {
@@ -3519,7 +3533,7 @@ static void check_gl_features(struct gl_video *p)
bool have_ssbo = ra->caps & RA_CAP_BUF_RW;
bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD;
- const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgba16hf",
+ const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16",
"rgb10_a2", "rgba8", 0};
const char *user_fbo_fmts[] = {p->opts.fbo_format, 0};
const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto")
@@ -3646,8 +3660,11 @@ static void check_gl_features(struct gl_video *p)
p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d;
// mix() is needed for some gamma functions
- if (!have_mglsl && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) {
- p->opts.linear_scaling = false;
+ if (!have_mglsl && (p->opts.linear_downscaling ||
+ p->opts.linear_upscaling || p->opts.sigmoid_upscaling))
+ {
+ p->opts.linear_downscaling = false;
+ p->opts.linear_upscaling = false;
p->opts.sigmoid_upscaling = false;
MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n");
}
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
index 2184599582..ca8b6f65d4 100644
--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@@ -112,8 +112,9 @@ struct gl_video_opts {
float tone_mapping_param;
float tone_mapping_desat;
int gamut_warning;
- int linear_scaling;
int correct_downscaling;
+ int linear_downscaling;
+ int linear_upscaling;
int sigmoid_upscaling;
float sigmoid_center;
float sigmoid_slope;
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
index 19fb0ccde8..342fb39ded 100644
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@@ -655,18 +655,6 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
GLSLF("float sig_peak = %f;\n", src_peak);
GLSLF("float sig_avg = %f;\n", sdr_avg);
- // Desaturate the color using a coefficient dependent on the signal
- // Do this before peak detection in order to try and reclaim as much
- // dynamic range as possible.
- if (desat > 0) {
- float base = 0.18 * dst_peak;
- GLSL(float luma = dot(dst_luma, color.rgb);)
- GLSLF("float coeff = max(sig - %f, 1e-6) / max(sig, 1e-6);\n", base);
- GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat);
- GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);)
- GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig`
- }
-
if (detect_peak)
hdr_update_peak(sc);
@@ -683,6 +671,18 @@ static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
GLSL(sig *= slope;)
GLSL(sig_peak *= slope;)
+ // Desaturate the color using a coefficient dependent on the signal.
+ // Do this after peak detection in order to prevent over-desaturating
+ // overly bright souces
+ if (desat > 0) {
+ float base = 0.18 * dst_peak;
+ GLSL(float luma = dot(dst_luma, color.rgb);)
+ GLSLF("float coeff = max(sig - %f, 1e-6) / max(sig, 1e-6);\n", base);
+ GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat);
+ GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);)
+ GLSL(sig = mix(sig, luma * slope, coeff);) // also make sure to update `sig`
+ }
+
switch (algo) {
case TONE_MAPPING_CLIP:
GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param);
@@ -833,10 +833,14 @@ void pass_color_map(struct gl_shader_cache *sc,
// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post.
// Obtain random numbers by calling rand(h), followed by h = permute(h) to
// update the state. Assumes the texture was hooked.
+// permute() was modified from the original to avoid "large" numbers in
+// calculations, since low-end mobile GPUs choke on them (overflow).
static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg)
{
GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; })
- GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); })
+ GLSLHF("float permute(float x) {\n");
+ GLSLH(return mod289( mod289(34.0*x + 1.0) * (fract(x) + 1.0) );)
+ GLSLHF("}\n");
GLSLH(float rand(float x) { return fract(x * 1.0/41.0); })
// Initialize the PRNG by hashing the position + a random uniform