From 65979986a923a8f08019b257c3fe72cd5e8ecf68 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 14 Sep 2017 08:04:55 +0200 Subject: vo_opengl: refactor into vo_gpu This is done in several steps: 1. refactor MPGLContext -> struct ra_ctx 2. move GL-specific stuff in vo_opengl into opengl/context.c 3. generalize context creation to support other APIs, and add --gpu-api 4. rename all of the --opengl- options that are no longer opengl-specific 5. move all of the stuff from opengl/* that isn't GL-specific into gpu/ (note: opengl/gl_utils.h became opengl/utils.h) 6. rename vo_opengl to vo_gpu 7. to handle window screenshots, the short-term approach was to just add it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to ra itself (and vo_gpu altered to compensate), but this was a stop-gap measure to prevent this commit from getting too big 8. move ra->fns->flush to ra_gl_ctx instead 9. some other minor changes that I've probably already forgotten Note: This is one half of a major refactor, the other half of which is provided by rossy's following commit. This commit enables support for all linux platforms, while his version enables support for all non-linux platforms. Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the --opengl- options like --opengl-early-flush, --opengl-finish etc. Should be a strict superset of the old functionality. Disclaimer: Since I have no way of compiling mpv on all platforms, some of these ports were done blindly. Specifically, the blind ports included context_mali_fbdev.c and context_rpi.c. Since they're both based on egl_helpers, the port should have gone smoothly without any major changes required. But if somebody complains about a compile error on those platforms (assuming anybody actually uses them), you know where to complain. --- video/out/gpu/context.c | 186 ++ video/out/gpu/context.h | 95 + video/out/gpu/hwdec.c | 239 +++ video/out/gpu/hwdec.h | 130 ++ video/out/gpu/lcms.c | 531 +++++ video/out/gpu/lcms.h | 43 + video/out/gpu/osd.c | 367 ++++ video/out/gpu/osd.h | 25 + video/out/gpu/ra.c | 327 +++ video/out/gpu/ra.h | 488 +++++ video/out/gpu/shader_cache.c | 954 +++++++++ video/out/gpu/shader_cache.h | 56 + video/out/gpu/user_shaders.c | 452 ++++ video/out/gpu/user_shaders.h | 98 + video/out/gpu/utils.c | 372 ++++ video/out/gpu/utils.h | 120 ++ video/out/gpu/video.c | 3809 ++++++++++++++++++++++++++++++++ video/out/gpu/video.h | 194 ++ video/out/gpu/video_shaders.c | 872 ++++++++ video/out/gpu/video_shaders.h | 56 + video/out/opengl/common.h | 4 +- video/out/opengl/context.c | 446 ++-- video/out/opengl/context.h | 152 +- video/out/opengl/context_cocoa.c | 2 +- video/out/opengl/context_drm_egl.c | 194 +- video/out/opengl/context_glx.c | 376 ++++ video/out/opengl/context_mali_fbdev.c | 58 +- video/out/opengl/context_rpi.c | 84 +- video/out/opengl/context_vdpau.c | 202 +- video/out/opengl/context_wayland.c | 74 +- video/out/opengl/context_x11.c | 358 ---- video/out/opengl/context_x11egl.c | 84 +- video/out/opengl/egl_helpers.c | 114 +- video/out/opengl/egl_helpers.h | 19 +- video/out/opengl/formats.h | 1 - video/out/opengl/gl_utils.c | 291 --- video/out/opengl/gl_utils.h | 56 - video/out/opengl/hwdec.c | 239 --- video/out/opengl/hwdec.h | 130 -- video/out/opengl/hwdec_cuda.c | 3 +- video/out/opengl/hwdec_ios.m | 2 +- video/out/opengl/hwdec_osx.c | 2 +- video/out/opengl/hwdec_rpi.c | 2 +- video/out/opengl/hwdec_vaegl.c | 4 +- video/out/opengl/hwdec_vaglx.c | 5 +- video/out/opengl/hwdec_vdpau.c | 2 +- video/out/opengl/lcms.c | 531 ----- video/out/opengl/lcms.h | 43 - video/out/opengl/osd.c | 367 ---- video/out/opengl/osd.h | 25 - video/out/opengl/ra.c | 327 --- video/out/opengl/ra.h | 491 ----- video/out/opengl/ra_gl.c | 7 - video/out/opengl/ra_gl.h | 3 +- video/out/opengl/shader_cache.c | 955 --------- video/out/opengl/shader_cache.h | 56 - video/out/opengl/user_shaders.c | 452 ---- video/out/opengl/user_shaders.h | 98 - video/out/opengl/utils.c | 524 ++--- video/out/opengl/utils.h | 151 +- video/out/opengl/video.c | 3813 --------------------------------- video/out/opengl/video.h | 195 -- video/out/opengl/video_shaders.c | 872 -------- video/out/opengl/video_shaders.h | 56 - video/out/vo.c | 6 +- video/out/vo_gpu.c | 385 ++++ video/out/vo_opengl.c | 470 ---- video/out/vo_opengl_cb.c | 53 +- video/out/vo_rpi.c | 2 +- 69 files changed, 11238 insertions(+), 10962 deletions(-) create mode 100644 video/out/gpu/context.c create mode 100644 video/out/gpu/context.h create mode 100644 video/out/gpu/hwdec.c create mode 100644 video/out/gpu/hwdec.h create mode 100644 video/out/gpu/lcms.c create mode 100644 video/out/gpu/lcms.h create mode 100644 video/out/gpu/osd.c create mode 100644 video/out/gpu/osd.h create mode 100644 video/out/gpu/ra.c create mode 100644 video/out/gpu/ra.h create mode 100644 video/out/gpu/shader_cache.c create mode 100644 video/out/gpu/shader_cache.h create mode 100644 video/out/gpu/user_shaders.c create mode 100644 video/out/gpu/user_shaders.h create mode 100644 video/out/gpu/utils.c create mode 100644 video/out/gpu/utils.h create mode 100644 video/out/gpu/video.c create mode 100644 video/out/gpu/video.h create mode 100644 video/out/gpu/video_shaders.c create mode 100644 video/out/gpu/video_shaders.h create mode 100644 video/out/opengl/context_glx.c delete mode 100644 video/out/opengl/context_x11.c delete mode 100644 video/out/opengl/gl_utils.c delete mode 100644 video/out/opengl/gl_utils.h delete mode 100644 video/out/opengl/hwdec.c delete mode 100644 video/out/opengl/hwdec.h delete mode 100644 video/out/opengl/lcms.c delete mode 100644 video/out/opengl/lcms.h delete mode 100644 video/out/opengl/osd.c delete mode 100644 video/out/opengl/osd.h delete mode 100644 video/out/opengl/ra.c delete mode 100644 video/out/opengl/ra.h delete mode 100644 video/out/opengl/shader_cache.c delete mode 100644 video/out/opengl/shader_cache.h delete mode 100644 video/out/opengl/user_shaders.c delete mode 100644 video/out/opengl/user_shaders.h delete mode 100644 video/out/opengl/video.c delete mode 100644 video/out/opengl/video.h delete mode 100644 video/out/opengl/video_shaders.c delete mode 100644 video/out/opengl/video_shaders.h create mode 100644 video/out/vo_gpu.c delete mode 100644 video/out/vo_opengl.c (limited to 'video') diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c new file mode 100644 index 0000000000..dbabba8b3b --- /dev/null +++ b/video/out/gpu/context.c @@ -0,0 +1,186 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "common/common.h" +#include "common/msg.h" +#include "options/options.h" +#include "options/m_option.h" +#include "video/out/vo.h" + +#include "context.h" + +extern const struct ra_ctx_fns ra_ctx_glx; +extern const struct ra_ctx_fns ra_ctx_glx_probe; +extern const struct ra_ctx_fns ra_ctx_x11_egl; +extern const struct ra_ctx_fns ra_ctx_drm_egl; +extern const struct ra_ctx_fns ra_ctx_cocoa; +extern const struct ra_ctx_fns ra_ctx_wayland_egl; +extern const struct ra_ctx_fns ra_ctx_wgl; +extern const struct ra_ctx_fns ra_ctx_angle; +extern const struct ra_ctx_fns ra_ctx_dxinterop; +extern const struct ra_ctx_fns ra_ctx_rpi; +extern const struct ra_ctx_fns ra_ctx_mali; +extern const struct ra_ctx_fns ra_ctx_vdpauglx; + +static const struct ra_ctx_fns *contexts[] = { +// OpenGL contexts: +#if HAVE_RPI + &ra_ctx_rpi, +#endif +/* +#if HAVE_GL_COCOA + &ra_ctx_cocoa, +#endif +#if HAVE_EGL_ANGLE_WIN32 + &ra_ctx_angle, +#endif +#if HAVE_GL_WIN32 + &ra_ctx_wgl, +#endif +#if HAVE_GL_DXINTEROP + &ra_ctx_dxinterop, +#endif +*/ +#if HAVE_GL_X11 + &ra_ctx_glx_probe, +#endif +#if HAVE_EGL_X11 + &ra_ctx_x11_egl, +#endif +#if HAVE_GL_X11 + &ra_ctx_glx, +#endif +#if HAVE_GL_WAYLAND + &ra_ctx_wayland_egl, +#endif +#if HAVE_EGL_DRM + &ra_ctx_drm_egl, +#endif +#if HAVE_MALI_FBDEV + &ra_ctx_mali, +#endif +#if HAVE_VDPAU_GL_X11 + &ra_ctx_vdpauglx, +#endif +}; + +static bool get_help(struct mp_log *log, struct bstr param) +{ + if (bstr_equals0(param, "help")) { + mp_info(log, "GPU contexts / APIs:\n"); + mp_info(log, " auto (autodetect)\n"); + for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) + mp_info(log, " %s (%s)\n", contexts[n]->name, contexts[n]->type); + return true; + } + + return false; +} + +int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param) +{ + if (get_help(log, param)) + return M_OPT_EXIT; + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->type)) + return 1; + } + return M_OPT_INVALID; +} + +int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param) +{ + if (get_help(log, param)) + return M_OPT_EXIT; + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->name)) + return 1; + } + return M_OPT_INVALID; +} + +// Create a VO window and create a RA context on it. +// vo_flags: passed to the backend's create window function +struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type, + const char *context_name, struct ra_ctx_opts opts) +{ + bool api_auto = !context_type || strcmp(context_type, "auto") == 0; + bool ctx_auto = !context_name || strcmp(context_name, "auto") == 0; + + if (ctx_auto) { + MP_VERBOSE(vo, "Probing for best GPU context.\n"); + opts.probing = true; + } + + // Hack to silence backend (X11/Wayland/etc.) errors. Kill it once backends + // are separate from `struct vo` + bool old_probing = vo->probing; + vo->probing = opts.probing; + + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (!opts.probing && strcmp(contexts[i]->name, context_name) != 0) + continue; + if (!api_auto && strcmp(contexts[i]->type, context_type) != 0) + continue; + + struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct ra_ctx) { + .vo = vo, + .global = vo->global, + .log = mp_log_new(ctx, vo->log, contexts[i]->type), + .opts = opts, + .fns = contexts[i], + }; + + MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name); + if (contexts[i]->init(ctx)) { + vo->probing = old_probing; + return ctx; + } + + talloc_free(ctx); + } + + // If we've reached this point, then none of the contexts matched the name + // requested, or the backend creation failed for all of them. + MP_ERR(vo, "Failed initializing any suitable GPU context!\n"); + vo->probing = old_probing; + return NULL; +} + +void ra_ctx_destroy(struct ra_ctx **ctx) +{ + if (*ctx) + (*ctx)->fns->uninit(*ctx); + talloc_free(*ctx); + *ctx = NULL; +} diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h new file mode 100644 index 0000000000..42de59b75f --- /dev/null +++ b/video/out/gpu/context.h @@ -0,0 +1,95 @@ +#pragma once + +#include "video/out/vo.h" + +#include "config.h" +#include "ra.h" + +struct ra_ctx_opts { + int allow_sw; // allow software renderers + int want_alpha; // create an alpha framebuffer if possible + int debug; // enable debugging layers/callbacks etc. + bool probing; // the backend was auto-probed + int swapchain_depth; // max number of images to render ahead +}; + +struct ra_ctx { + struct vo *vo; + struct ra *ra; + struct mpv_global *global; + struct mp_log *log; + + struct ra_ctx_opts opts; + const struct ra_ctx_fns *fns; + struct ra_swapchain *swapchain; + + void *priv; +}; + +// The functions that make up a ra_ctx. +struct ra_ctx_fns { + const char *type; // API type (for --gpu-api) + const char *name; // name (for --gpu-context) + + // Resize the window, or create a new window if there isn't one yet. + // Currently, there is an unfortunate interaction with ctx->vo, and + // display size etc. are determined by it. + bool (*reconfig)(struct ra_ctx *ctx); + + // This behaves exactly like vo_driver.control(). + int (*control)(struct ra_ctx *ctx, int *events, int request, void *arg); + + // These behave exactly like vo_driver.wakeup/wait_events. They are + // optional. + void (*wakeup)(struct ra_ctx *ctx); + void (*wait_events)(struct ra_ctx *ctx, int64_t until_time_us); + + // Initialize/destroy the 'struct ra' and possibly the underlying VO backend. + // Not normally called by the user of the ra_ctx. + bool (*init)(struct ra_ctx *ctx); + void (*uninit)(struct ra_ctx *ctx); +}; + +// Extra struct for the swapchain-related functions so they can be easily +// inherited from helpers. +struct ra_swapchain { + struct ra_ctx *ctx; + struct priv *priv; + const struct ra_swapchain_fns *fns; + + bool flip_v; // flip the rendered image vertically (set by the swapchain) +}; + +struct ra_swapchain_fns { + // Gets the current framebuffer depth in bits (0 if unknown). Optional. + int (*color_depth)(struct ra_swapchain *sw); + + // Retrieves a screenshot of the framebuffer. These are always the right + // side up, regardless of ra_swapchain->flip_v. Optional. + struct mp_image *(*screenshot)(struct ra_swapchain *sw); + + // Called when rendering starts. Returns NULL on failure. This must be + // followed by submit_frame, to submit the rendered frame. + struct ra_tex *(*start_frame)(struct ra_swapchain *sw); + + // Present the frame. Issued in lockstep with start_frame, with rendering + // commands in between. The `frame` is just there for timing data, for + // swapchains smart enough to do something with it. + bool (*submit_frame)(struct ra_swapchain *sw, const struct vo_frame *frame); + + // Performs a buffer swap. This blocks for as long as necessary to meet + // params.swapchain_depth, or until the next vblank (for vsynced contexts) + void (*swap_buffers)(struct ra_swapchain *sw); +}; + +// Create and destroy a ra_ctx. This also takes care of creating and destroying +// the underlying `struct ra`, and perhaps the underlying VO backend. +struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type, + const char *context_name, struct ra_ctx_opts opts); +void ra_ctx_destroy(struct ra_ctx **ctx); + +struct m_option; +int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param); +int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param); diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c new file mode 100644 index 0000000000..5fbc1aa4a9 --- /dev/null +++ b/video/out/gpu/hwdec.c @@ -0,0 +1,239 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include + +#include "config.h" + +#include "common/common.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "hwdec.h" + +extern const struct ra_hwdec_driver ra_hwdec_vaegl; +extern const struct ra_hwdec_driver ra_hwdec_vaglx; +extern const struct ra_hwdec_driver ra_hwdec_videotoolbox; +extern const struct ra_hwdec_driver ra_hwdec_vdpau; +extern const struct ra_hwdec_driver ra_hwdec_dxva2egl; +extern const struct ra_hwdec_driver ra_hwdec_d3d11egl; +extern const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb; +extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx; +extern const struct ra_hwdec_driver ra_hwdec_dxva2; +extern const struct ra_hwdec_driver ra_hwdec_cuda; +extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; + +static const struct ra_hwdec_driver *const mpgl_hwdec_drivers[] = { +#if HAVE_VAAPI_EGL + &ra_hwdec_vaegl, +#endif +#if HAVE_VAAPI_GLX + &ra_hwdec_vaglx, +#endif +#if HAVE_VDPAU_GL_X11 + &ra_hwdec_vdpau, +#endif +#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL + &ra_hwdec_videotoolbox, +#endif +#if HAVE_D3D_HWACCEL + &ra_hwdec_d3d11egl, + &ra_hwdec_d3d11eglrgb, + #if HAVE_D3D9_HWACCEL + &ra_hwdec_dxva2egl, + #endif +#endif +#if HAVE_GL_DXINTEROP_D3D9 + &ra_hwdec_dxva2gldx, +#endif +#if HAVE_CUDA_HWACCEL + &ra_hwdec_cuda, +#endif +#if HAVE_RPI + &ra_hwdec_rpi_overlay, +#endif + NULL +}; + +static struct ra_hwdec *load_hwdec_driver(struct mp_log *log, struct ra *ra, + struct mpv_global *global, + struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, + bool is_auto) +{ + struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec); + *hwdec = (struct ra_hwdec) { + .driver = drv, + .log = mp_log_new(hwdec, log, drv->name), + .global = global, + .ra = ra, + .devs = devs, + .probing = is_auto, + .priv = talloc_zero_size(hwdec, drv->priv_size), + }; + mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name); + if (hwdec->driver->init(hwdec) < 0) { + ra_hwdec_uninit(hwdec); + mp_verbose(log, "Loading failed.\n"); + return NULL; + } + return hwdec; +} + +struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + enum hwdec_type api) +{ + bool is_auto = HWDEC_IS_AUTO(api); + for (int n = 0; mpgl_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; + if ((is_auto || api == drv->api) && !drv->testing_only) { + struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, is_auto); + if (r) + return r; + } + } + return NULL; +} + +// Load by option name. +struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + const char *name) +{ + int g_hwdec_api; + mp_read_option_raw(g, "hwdec", &m_option_type_choice, &g_hwdec_api); + if (!name || !name[0]) + name = m_opt_choice_str(mp_hwdec_names, g_hwdec_api); + + int api_id = HWDEC_NONE; + for (int n = 0; mp_hwdec_names[n].name; n++) { + if (name && strcmp(mp_hwdec_names[n].name, name) == 0) + api_id = mp_hwdec_names[n].value; + } + + for (int n = 0; mpgl_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; + if (name && strcmp(drv->name, name) == 0) { + struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, false); + if (r) + return r; + } + } + + return ra_hwdec_load_api(log, ra, g, devs, api_id); +} + +int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param) +{ + bool help = bstr_equals0(param, "help"); + if (help) + mp_info(log, "Available hwdecs:\n"); + for (int n = 0; mpgl_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; + const char *api_name = m_opt_choice_str(mp_hwdec_names, drv->api); + if (help) { + mp_info(log, " %s [%s]\n", drv->name, api_name); + } else if (bstr_equals0(param, drv->name) || + bstr_equals0(param, api_name)) + { + return 1; + } + } + if (help) { + mp_info(log, " auto (loads best)\n" + " (other --hwdec values)\n" + "Setting an empty string means use --hwdec.\n"); + return M_OPT_EXIT; + } + if (!param.len) + return 1; // "" is treated specially + for (int n = 0; mp_hwdec_names[n].name; n++) { + if (bstr_equals0(param, mp_hwdec_names[n].name)) + return 1; + } + mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param)); + return M_OPT_INVALID; +} + +void ra_hwdec_uninit(struct ra_hwdec *hwdec) +{ + if (hwdec) + hwdec->driver->uninit(hwdec); + talloc_free(hwdec); +} + +bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt) +{ + for (int n = 0; hwdec->driver->imgfmts[n]; n++) { + if (hwdec->driver->imgfmts[n] == imgfmt) + return true; + } + return false; +} + +struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, + struct mp_image_params *params) +{ + assert(ra_hwdec_test_format(hwdec, params->imgfmt)); + + struct ra_hwdec_mapper *mapper = talloc_ptrtype(NULL, mapper); + *mapper = (struct ra_hwdec_mapper){ + .owner = hwdec, + .driver = hwdec->driver->mapper, + .log = hwdec->log, + .ra = hwdec->ra, + .priv = talloc_zero_size(mapper, hwdec->driver->mapper->priv_size), + .src_params = *params, + .dst_params = *params, + }; + if (mapper->driver->init(mapper) < 0) + ra_hwdec_mapper_free(&mapper); + return mapper; +} + +void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper) +{ + struct ra_hwdec_mapper *p = *mapper; + if (p) { + ra_hwdec_mapper_unmap(p); + p->driver->uninit(p); + talloc_free(p); + } + *mapper = NULL; +} + +void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + if (mapper->driver->unmap) + mapper->driver->unmap(mapper); + mp_image_unrefp(&mapper->src); +} + +int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img) +{ + ra_hwdec_mapper_unmap(mapper); + mp_image_setrefp(&mapper->src, img); + if (mapper->driver->map(mapper) < 0) { + ra_hwdec_mapper_unmap(mapper); + return -1; + } + return 0; +} diff --git a/video/out/gpu/hwdec.h b/video/out/gpu/hwdec.h new file mode 100644 index 0000000000..20bbaae9eb --- /dev/null +++ b/video/out/gpu/hwdec.h @@ -0,0 +1,130 @@ +#ifndef MPGL_HWDEC_H_ +#define MPGL_HWDEC_H_ + +#include "video/mp_image.h" +#include "ra.h" +#include "video/hwdec.h" + +struct ra_hwdec { + const struct ra_hwdec_driver *driver; + struct mp_log *log; + struct mpv_global *global; + struct ra *ra; + struct mp_hwdec_devices *devs; + // GLSL extensions required to sample textures from this. + const char **glsl_extensions; + // For free use by hwdec driver + void *priv; + // For working around the vdpau vs. vaapi mess. + bool probing; + // Used in overlay mode only. + float overlay_colorkey[4]; +}; + +struct ra_hwdec_mapper { + const struct ra_hwdec_mapper_driver *driver; + struct mp_log *log; + struct ra *ra; + void *priv; + struct ra_hwdec *owner; + // Input frame parameters. (Set before init(), immutable.) + struct mp_image_params src_params; + // Output frame parameters (represents the format the textures return). Must + // be set by init(), immutable afterwards, + struct mp_image_params dst_params; + + // The currently mapped source image (or the image about to be mapped in + // ->map()). NULL if unmapped. The mapper can also clear this reference if + // the mapped textures contain a full copy. + struct mp_image *src; + + // The mapped textures and metadata about them. These fields change if a + // new frame is mapped (or unmapped), but otherwise remain constant. + // The common code won't mess with these, so you can e.g. set them in the + // .init() callback. + struct ra_tex *tex[4]; + bool vdpau_fields; +}; + +// This can be used to map frames of a specific hw format as GL textures. +struct ra_hwdec_mapper_driver { + // Used to create ra_hwdec_mapper.priv. + size_t priv_size; + + // Init the mapper implementation. At this point, the field src_params, + // fns, devs, priv are initialized. + int (*init)(struct ra_hwdec_mapper *mapper); + // Destroy the mapper. unmap is called before this. + void (*uninit)(struct ra_hwdec_mapper *mapper); + + // Map mapper->src as texture, and set mapper->frame to textures using it. + // It is expected that that the textures remain valid until the next unmap + // or uninit call. + // The function is allowed to unref mapper->src if it's not needed (i.e. + // this function creates a copy). + // The underlying format can change, so you might need to do some form + // of change detection. You also must reject unsupported formats with an + // error. + // On error, returns negative value on error and remains unmapped. + int (*map)(struct ra_hwdec_mapper *mapper); + // Unmap the frame. Does nothing if already unmapped. Optional. + void (*unmap)(struct ra_hwdec_mapper *mapper); +}; + +struct ra_hwdec_driver { + // Name of the interop backend. This is used for informational purposes only. + const char *name; + // Used to create ra_hwdec.priv. + size_t priv_size; + // Used to explicitly request a specific API. + enum hwdec_type api; + // One of the hardware surface IMGFMT_ that must be passed to map_image later. + // Terminated with a 0 entry. (Extend the array size as needed.) + const int imgfmts[3]; + // Dosn't load this unless requested by name. + bool testing_only; + + // Create the hwdec device. It must add it to hw->devs, if applicable. + int (*init)(struct ra_hwdec *hw); + void (*uninit)(struct ra_hwdec *hw); + + // This will be used to create a ra_hwdec_mapper from ra_hwdec. + const struct ra_hwdec_mapper_driver *mapper; + + // The following function provides an alternative API. Each ra_hwdec_driver + // must have either provide a mapper or overlay_frame (not both or none), and + // if overlay_frame is set, it operates in overlay mode. In this mode, + // OSD etc. is rendered via OpenGL, but the video is rendered as a separate + // layer below it. + // Non-overlay mode is strictly preferred, so try not to use overlay mode. + // Set the given frame as overlay, replacing the previous one. This can also + // just change the position of the overlay. + // hw_image==src==dst==NULL is passed to clear the overlay. + int (*overlay_frame)(struct ra_hwdec *hw, struct mp_image *hw_image, + struct mp_rect *src, struct mp_rect *dst, bool newframe); +}; + +struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + enum hwdec_type api); + +struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + const char *name); + +int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param); + +void ra_hwdec_uninit(struct ra_hwdec *hwdec); + +bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt); + +struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, + struct mp_image_params *params); +void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper); +void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper); +int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img); + +#endif diff --git a/video/out/gpu/lcms.c b/video/out/gpu/lcms.c new file mode 100644 index 0000000000..8747ae6aa6 --- /dev/null +++ b/video/out/gpu/lcms.c @@ -0,0 +1,531 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include + +#include "mpv_talloc.h" + +#include "config.h" + +#include "stream/stream.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "common/msg.h" +#include "options/m_option.h" +#include "options/path.h" +#include "video/csputils.h" +#include "lcms.h" + +#include "osdep/io.h" + +#if HAVE_LCMS2 + +#include +#include +#include + +struct gl_lcms { + void *icc_data; + size_t icc_size; + struct AVBufferRef *vid_profile; + char *current_profile; + bool using_memory_profile; + bool changed; + enum mp_csp_prim current_prim; + enum mp_csp_trc current_trc; + + struct mp_log *log; + struct mpv_global *global; + struct mp_icc_opts *opts; +}; + +static bool parse_3dlut_size(const char *arg, int *p1, int *p2, int *p3) +{ + if (sscanf(arg, "%dx%dx%d", p1, p2, p3) != 3) + return false; + for (int n = 0; n < 3; n++) { + int s = ((int[]) { *p1, *p2, *p3 })[n]; + if (s < 2 || s > 512) + return false; + } + return true; +} + +static int validate_3dlut_size_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param) +{ + int p1, p2, p3; + char s[20]; + snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); + return parse_3dlut_size(s, &p1, &p2, &p3); +} + +#define OPT_BASE_STRUCT struct mp_icc_opts +const struct m_sub_options mp_icc_conf = { + .opts = (const m_option_t[]) { + OPT_FLAG("use-embedded-icc-profile", use_embedded, 0), + OPT_STRING("icc-profile", profile, M_OPT_FILE), + OPT_FLAG("icc-profile-auto", profile_auto, 0), + OPT_STRING("icc-cache-dir", cache_dir, M_OPT_FILE), + OPT_INT("icc-intent", intent, 0), + OPT_INTRANGE("icc-contrast", contrast, 0, 0, 100000), + OPT_STRING_VALIDATE("icc-3dlut-size", size_str, 0, validate_3dlut_size_opt), + + OPT_REPLACED("3dlut-size", "icc-3dlut-size"), + OPT_REMOVED("icc-cache", "see icc-cache-dir"), + {0} + }, + .size = sizeof(struct mp_icc_opts), + .defaults = &(const struct mp_icc_opts) { + .size_str = "64x64x64", + .intent = INTENT_RELATIVE_COLORIMETRIC, + .use_embedded = true, + }, +}; + +static void lcms2_error_handler(cmsContext ctx, cmsUInt32Number code, + const char *msg) +{ + struct gl_lcms *p = cmsGetContextUserData(ctx); + MP_ERR(p, "lcms2: %s\n", msg); +} + +static void load_profile(struct gl_lcms *p) +{ + talloc_free(p->icc_data); + p->icc_data = NULL; + p->icc_size = 0; + p->using_memory_profile = false; + talloc_free(p->current_profile); + p->current_profile = NULL; + + if (!p->opts->profile || !p->opts->profile[0]) + return; + + char *fname = mp_get_user_path(NULL, p->global, p->opts->profile); + MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname); + struct bstr iccdata = stream_read_file(fname, p, p->global, + 100000000); // 100 MB + talloc_free(fname); + if (!iccdata.len) + return; + + talloc_free(p->icc_data); + + p->icc_data = iccdata.start; + p->icc_size = iccdata.len; + p->current_profile = talloc_strdup(p, p->opts->profile); +} + +static void gl_lcms_destructor(void *ptr) +{ + struct gl_lcms *p = ptr; + av_buffer_unref(&p->vid_profile); +} + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts) +{ + struct gl_lcms *p = talloc_ptrtype(talloc_ctx, p); + talloc_set_destructor(p, gl_lcms_destructor); + *p = (struct gl_lcms) { + .global = global, + .log = log, + .opts = opts, + }; + gl_lcms_update_options(p); + return p; +} + +void gl_lcms_update_options(struct gl_lcms *p) +{ + if ((p->using_memory_profile && !p->opts->profile_auto) || + !bstr_equals(bstr0(p->opts->profile), bstr0(p->current_profile))) + { + load_profile(p); + } + + p->changed = true; // probably +} + +// Warning: profile.start must point to a ta allocation, and the function +// takes over ownership. +// Returns whether the internal profile was changed. +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) +{ + if (!p->opts->profile_auto || (p->opts->profile && p->opts->profile[0])) { + talloc_free(profile.start); + return false; + } + + if (p->using_memory_profile && + p->icc_data && profile.start && + profile.len == p->icc_size && + memcmp(profile.start, p->icc_data, p->icc_size) == 0) + { + talloc_free(profile.start); + return false; + } + + p->changed = true; + p->using_memory_profile = true; + + talloc_free(p->icc_data); + + p->icc_data = talloc_steal(p, profile.start); + p->icc_size = profile.len; + + return true; +} + +// Guards against NULL and uses bstr_equals to short-circuit some special cases +static bool vid_profile_eq(struct AVBufferRef *a, struct AVBufferRef *b) +{ + if (!a || !b) + return a == b; + + return bstr_equals((struct bstr){ a->data, a->size }, + (struct bstr){ b->data, b->size }); +} + +// Return whether the profile or config has changed since the last time it was +// retrieved. If it has changed, gl_lcms_get_lut3d() should be called. +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile) +{ + if (p->changed || p->current_prim != prim || p->current_trc != trc) + return true; + + return !vid_profile_eq(p->vid_profile, vid_profile); +} + +// Whether a profile is set. (gl_lcms_get_lut3d() is expected to return a lut, +// but it could still fail due to runtime errors, such as invalid icc data.) +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return p->icc_size > 0; +} + +static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms, + cmsHPROFILE disp_profile, + enum mp_csp_prim prim, enum mp_csp_trc trc) +{ + if (p->opts->use_embedded && p->vid_profile) { + // Try using the embedded ICC profile + cmsHPROFILE prof = cmsOpenProfileFromMemTHR(cms, p->vid_profile->data, + p->vid_profile->size); + if (prof) { + MP_VERBOSE(p, "Successfully opened embedded ICC profile\n"); + return prof; + } + + // Otherwise, warn the user and generate the profile as usual + MP_WARN(p, "Video contained an invalid ICC profile! Ignoring..\n"); + } + + // The input profile for the transformation is dependent on the video + // primaries and transfer characteristics + struct mp_csp_primaries csp = mp_get_csp_primaries(prim); + cmsCIExyY wp_xyY = {csp.white.x, csp.white.y, 1.0}; + cmsCIExyYTRIPLE prim_xyY = { + .Red = {csp.red.x, csp.red.y, 1.0}, + .Green = {csp.green.x, csp.green.y, 1.0}, + .Blue = {csp.blue.x, csp.blue.y, 1.0}, + }; + + cmsToneCurve *tonecurve[3] = {0}; + switch (trc) { + case MP_CSP_TRC_LINEAR: tonecurve[0] = cmsBuildGamma(cms, 1.0); break; + case MP_CSP_TRC_GAMMA18: tonecurve[0] = cmsBuildGamma(cms, 1.8); break; + case MP_CSP_TRC_GAMMA22: tonecurve[0] = cmsBuildGamma(cms, 2.2); break; + case MP_CSP_TRC_GAMMA28: tonecurve[0] = cmsBuildGamma(cms, 2.8); break; + + case MP_CSP_TRC_SRGB: + // Values copied from Little-CMS + tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, + (double[5]){2.40, 1/1.055, 0.055/1.055, 1/12.92, 0.04045}); + break; + + case MP_CSP_TRC_PRO_PHOTO: + tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, + (double[5]){1.8, 1.0, 0.0, 1/16.0, 0.03125}); + break; + + case MP_CSP_TRC_BT_1886: { + // To build an appropriate BT.1886 transformation we need access to + // the display's black point, so we LittleCMS' detection function. + // Relative colorimetric is used since we want to approximate the + // BT.1886 to the target device's actual black point even in e.g. + // perceptual mode + const int intent = MP_INTENT_RELATIVE_COLORIMETRIC; + cmsCIEXYZ bp_XYZ; + if (!cmsDetectBlackPoint(&bp_XYZ, disp_profile, intent, 0)) + return false; + + // Map this XYZ value back into the (linear) source space + cmsToneCurve *linear = cmsBuildGamma(cms, 1.0); + cmsHPROFILE rev_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, + (cmsToneCurve*[3]){linear, linear, linear}); + cmsHPROFILE xyz_profile = cmsCreateXYZProfile(); + cmsHTRANSFORM xyz2src = cmsCreateTransformTHR(cms, + xyz_profile, TYPE_XYZ_DBL, rev_profile, TYPE_RGB_DBL, + intent, 0); + cmsFreeToneCurve(linear); + cmsCloseProfile(rev_profile); + cmsCloseProfile(xyz_profile); + if (!xyz2src) + return false; + + double src_black[3]; + cmsDoTransform(xyz2src, &bp_XYZ, src_black, 1); + cmsDeleteTransform(xyz2src); + + // Contrast limiting + if (p->opts->contrast > 0) { + for (int i = 0; i < 3; i++) + src_black[i] = MPMAX(src_black[i], 1.0 / p->opts->contrast); + } + + // Built-in contrast failsafe + double contrast = 3.0 / (src_black[0] + src_black[1] + src_black[2]); + if (contrast > 100000) { + MP_WARN(p, "ICC profile detected contrast very high (>100000)," + " falling back to contrast 1000 for sanity. Set the" + " icc-contrast option to silence this warning.\n"); + src_black[0] = src_black[1] = src_black[2] = 1.0 / 1000; + } + + // Build the parametric BT.1886 transfer curve, one per channel + for (int i = 0; i < 3; i++) { + const double gamma = 2.40; + double binv = pow(src_black[i], 1.0/gamma); + tonecurve[i] = cmsBuildParametricToneCurve(cms, 6, + (double[4]){gamma, 1.0 - binv, binv, 0.0}); + } + break; + } + + default: + abort(); + } + + if (!tonecurve[0]) + return false; + + if (!tonecurve[1]) tonecurve[1] = tonecurve[0]; + if (!tonecurve[2]) tonecurve[2] = tonecurve[0]; + + cmsHPROFILE *vid_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, + tonecurve); + + if (tonecurve[2] != tonecurve[0]) cmsFreeToneCurve(tonecurve[2]); + if (tonecurve[1] != tonecurve[0]) cmsFreeToneCurve(tonecurve[1]); + cmsFreeToneCurve(tonecurve[0]); + + return vid_profile; +} + +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile) +{ + int s_r, s_g, s_b; + bool result = false; + + p->changed = false; + p->current_prim = prim; + p->current_trc = trc; + + // We need to hold on to a reference to the video's ICC profile for as long + // as we still need to perform equality checking, so generate a new + // reference here + av_buffer_unref(&p->vid_profile); + if (vid_profile) { + MP_VERBOSE(p, "Got an embedded ICC profile.\n"); + p->vid_profile = av_buffer_ref(vid_profile); + if (!p->vid_profile) + abort(); + } + + if (!parse_3dlut_size(p->opts->size_str, &s_r, &s_g, &s_b)) + return false; + + if (!gl_lcms_has_profile(p)) + return false; + + void *tmp = talloc_new(NULL); + uint16_t *output = talloc_array(tmp, uint16_t, s_r * s_g * s_b * 4); + struct lut3d *lut = NULL; + cmsContext cms = NULL; + + char *cache_file = NULL; + if (p->opts->cache_dir && p->opts->cache_dir[0]) { + // Gamma is included in the header to help uniquely identify it, + // because we may change the parameter in the future or make it + // customizable, same for the primaries. + char *cache_info = talloc_asprintf(tmp, + "ver=1.4, intent=%d, size=%dx%dx%d, prim=%d, trc=%d, " + "contrast=%d\n", + p->opts->intent, s_r, s_g, s_b, prim, trc, p->opts->contrast); + + uint8_t hash[32]; + struct AVSHA *sha = av_sha_alloc(); + if (!sha) + abort(); + av_sha_init(sha, 256); + av_sha_update(sha, cache_info, strlen(cache_info)); + if (vid_profile) + av_sha_update(sha, vid_profile->data, vid_profile->size); + av_sha_update(sha, p->icc_data, p->icc_size); + av_sha_final(sha, hash); + av_free(sha); + + char *cache_dir = mp_get_user_path(tmp, p->global, p->opts->cache_dir); + cache_file = talloc_strdup(tmp, ""); + for (int i = 0; i < sizeof(hash); i++) + cache_file = talloc_asprintf_append(cache_file, "%02X", hash[i]); + cache_file = mp_path_join(tmp, cache_dir, cache_file); + + mp_mkdirp(cache_dir); + } + + // check cache + if (cache_file && stat(cache_file, &(struct stat){0}) == 0) { + MP_VERBOSE(p, "Opening 3D LUT cache in file '%s'.\n", cache_file); + struct bstr cachedata = stream_read_file(cache_file, tmp, p->global, + 1000000000); // 1 GB + if (cachedata.len == talloc_get_size(output)) { + memcpy(output, cachedata.start, cachedata.len); + goto done; + } else { + MP_WARN(p, "3D LUT cache invalid!\n"); + } + } + + cms = cmsCreateContext(NULL, p); + if (!cms) + goto error_exit; + cmsSetLogErrorHandlerTHR(cms, lcms2_error_handler); + + cmsHPROFILE profile = + cmsOpenProfileFromMemTHR(cms, p->icc_data, p->icc_size); + if (!profile) + goto error_exit; + + cmsHPROFILE vid_hprofile = get_vid_profile(p, cms, profile, prim, trc); + if (!vid_hprofile) { + cmsCloseProfile(profile); + goto error_exit; + } + + cmsHTRANSFORM trafo = cmsCreateTransformTHR(cms, vid_hprofile, TYPE_RGB_16, + profile, TYPE_RGBA_16, + p->opts->intent, + cmsFLAGS_HIGHRESPRECALC | + cmsFLAGS_BLACKPOINTCOMPENSATION); + cmsCloseProfile(profile); + cmsCloseProfile(vid_hprofile); + + if (!trafo) + goto error_exit; + + // transform a (s_r)x(s_g)x(s_b) cube, with 3 components per channel + uint16_t *input = talloc_array(tmp, uint16_t, s_r * 3); + for (int b = 0; b < s_b; b++) { + for (int g = 0; g < s_g; g++) { + for (int r = 0; r < s_r; r++) { + input[r * 3 + 0] = r * 65535 / (s_r - 1); + input[r * 3 + 1] = g * 65535 / (s_g - 1); + input[r * 3 + 2] = b * 65535 / (s_b - 1); + } + size_t base = (b * s_r * s_g + g * s_r) * 4; + cmsDoTransform(trafo, input, output + base, s_r); + } + } + + cmsDeleteTransform(trafo); + + if (cache_file) { + FILE *out = fopen(cache_file, "wb"); + if (out) { + fwrite(output, talloc_get_size(output), 1, out); + fclose(out); + } + } + +done: ; + + lut = talloc_ptrtype(NULL, lut); + *lut = (struct lut3d) { + .data = talloc_steal(lut, output), + .size = {s_r, s_g, s_b}, + }; + + *result_lut3d = lut; + result = true; + +error_exit: + + if (cms) + cmsDeleteContext(cms); + + if (!lut) + MP_FATAL(p, "Error loading ICC profile.\n"); + + talloc_free(tmp); + return result; +} + +#else /* HAVE_LCMS2 */ + +const struct m_sub_options mp_icc_conf = { + .opts = (const m_option_t[]) { {0} }, + .size = sizeof(struct mp_icc_opts), + .defaults = &(const struct mp_icc_opts) {0}, +}; + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts) +{ + return (struct gl_lcms *) talloc_new(talloc_ctx); +} + +void gl_lcms_update_options(struct gl_lcms *p) { } +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) {return false;} + +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile) +{ + return false; +} + +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return false; +} + +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile) +{ + return false; +} + +#endif diff --git a/video/out/gpu/lcms.h b/video/out/gpu/lcms.h new file mode 100644 index 0000000000..35bbd61fe0 --- /dev/null +++ b/video/out/gpu/lcms.h @@ -0,0 +1,43 @@ +#ifndef MP_GL_LCMS_H +#define MP_GL_LCMS_H + +#include +#include +#include "misc/bstr.h" +#include "video/csputils.h" +#include + +extern const struct m_sub_options mp_icc_conf; + +struct mp_icc_opts { + int use_embedded; + char *profile; + int profile_auto; + char *cache_dir; + char *size_str; + int intent; + int contrast; +}; + +struct lut3d { + uint16_t *data; + int size[3]; +}; + +struct mp_log; +struct mpv_global; +struct gl_lcms; + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts); +void gl_lcms_update_options(struct gl_lcms *p); +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile); +bool gl_lcms_has_profile(struct gl_lcms *p); +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile); +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile); + +#endif diff --git a/video/out/gpu/osd.c b/video/out/gpu/osd.c new file mode 100644 index 0000000000..f7c325d1db --- /dev/null +++ b/video/out/gpu/osd.c @@ -0,0 +1,367 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include + +#include + +#include "common/common.h" +#include "common/msg.h" +#include "video/csputils.h" +#include "video/mp_image.h" +#include "osd.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); + +// glBlendFuncSeparate() arguments +static const int blend_factors[SUBBITMAP_COUNT][4] = { + [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, + [SUBBITMAP_RGBA] = {RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, +}; + +struct vertex { + float position[2]; + float texcoord[2]; + uint8_t ass_color[4]; +}; + +static const struct ra_renderpass_input vertex_vao[] = { + {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, + {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)}, + {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)}, + {0} +}; + +struct mpgl_osd_part { + enum sub_bitmap_format format; + int change_id; + struct ra_tex *texture; + int w, h; + int num_subparts; + int prev_num_subparts; + struct sub_bitmap *subparts; + int num_vertices; + struct vertex *vertices; +}; + +struct mpgl_osd { + struct mp_log *log; + struct osd_state *osd; + struct ra *ra; + struct mpgl_osd_part *parts[MAX_OSD_PARTS]; + const struct ra_format *fmt_table[SUBBITMAP_COUNT]; + bool formats[SUBBITMAP_COUNT]; + bool change_flag; // for reporting to API user only + // temporary + int stereo_mode; + struct mp_osd_res osd_res; + void *scratch; +}; + +struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, + struct osd_state *osd) +{ + struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct mpgl_osd) { + .log = log, + .osd = osd, + .ra = ra, + .change_flag = true, + .scratch = talloc_zero_size(ctx, 1), + }; + + ctx->fmt_table[SUBBITMAP_LIBASS] = ra_find_unorm_format(ra, 1, 1); + ctx->fmt_table[SUBBITMAP_RGBA] = ra_find_unorm_format(ra, 1, 4); + + for (int n = 0; n < MAX_OSD_PARTS; n++) + ctx->parts[n] = talloc_zero(ctx, struct mpgl_osd_part); + + for (int n = 0; n < SUBBITMAP_COUNT; n++) + ctx->formats[n] = !!ctx->fmt_table[n]; + + return ctx; +} + +void mpgl_osd_destroy(struct mpgl_osd *ctx) +{ + if (!ctx) + return; + + for (int n = 0; n < MAX_OSD_PARTS; n++) { + struct mpgl_osd_part *p = ctx->parts[n]; + ra_tex_free(ctx->ra, &p->texture); + } + talloc_free(ctx); +} + +static int next_pow2(int v) +{ + for (int x = 0; x < 30; x++) { + if ((1 << x) >= v) + return 1 << x; + } + return INT_MAX; +} + +static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, + struct sub_bitmaps *imgs) +{ + struct ra *ra = ctx->ra; + bool ok = false; + + assert(imgs->packed); + + int req_w = next_pow2(imgs->packed_w); + int req_h = next_pow2(imgs->packed_h); + + const struct ra_format *fmt = ctx->fmt_table[imgs->format]; + assert(fmt); + + if (!osd->texture || req_w > osd->w || req_h > osd->h || + osd->format != imgs->format) + { + ra_tex_free(ra, &osd->texture); + + osd->format = imgs->format; + osd->w = FFMAX(32, req_w); + osd->h = FFMAX(32, req_h); + + MP_VERBOSE(ctx, "Reallocating OSD texture to %dx%d.\n", osd->w, osd->h); + + if (osd->w > ra->max_texture_wh || osd->h > ra->max_texture_wh) { + MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " + "supported size %dx%d.\n", ra->max_texture_wh, + ra->max_texture_wh); + goto done; + } + + struct ra_tex_params params = { + .dimensions = 2, + .w = osd->w, + .h = osd->h, + .d = 1, + .format = fmt, + .render_src = true, + .src_linear = true, + .host_mutable = true, + }; + osd->texture = ra_tex_create(ra, ¶ms); + if (!osd->texture) + goto done; + } + + struct ra_tex_upload_params params = { + .tex = osd->texture, + .src = imgs->packed->planes[0], + .invalidate = true, + .rc = &(struct mp_rect){0, 0, imgs->packed_w, imgs->packed_h}, + .stride = imgs->packed->stride[0], + }; + + ok = ra->fns->tex_upload(ra, ¶ms); + +done: + return ok; +} + +static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) +{ + struct mpgl_osd *ctx = pctx; + + if (imgs->num_parts == 0 || !ctx->formats[imgs->format]) + return; + + struct mpgl_osd_part *osd = ctx->parts[imgs->render_index]; + + bool ok = true; + if (imgs->change_id != osd->change_id) { + if (!upload_osd(ctx, osd, imgs)) + ok = false; + + osd->change_id = imgs->change_id; + ctx->change_flag = true; + } + osd->num_subparts = ok ? imgs->num_parts : 0; + + MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts); + memcpy(osd->subparts, imgs->parts, + osd->num_subparts * sizeof(osd->subparts[0])); +} + +bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc) +{ + assert(index >= 0 && index < MAX_OSD_PARTS); + struct mpgl_osd_part *part = ctx->parts[index]; + + enum sub_bitmap_format fmt = part->format; + if (!fmt || !part->num_subparts) + return false; + + gl_sc_uniform_texture(sc, "osdtex", part->texture); + switch (fmt) { + case SUBBITMAP_RGBA: { + GLSL(color = texture(osdtex, texcoord).bgra;) + break; + } + case SUBBITMAP_LIBASS: { + GLSL(color = + vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);) + break; + } + default: + abort(); + } + + gl_sc_set_vertex_format(sc, vertex_vao, sizeof(struct vertex)); + + return true; +} + +static void write_quad(struct vertex *va, struct gl_transform t, + float x0, float y0, float x1, float y1, + float tx0, float ty0, float tx1, float ty1, + float tex_w, float tex_h, const uint8_t color[4]) +{ + gl_transform_vec(t, &x0, &y0); + gl_transform_vec(t, &x1, &y1); + +#define COLOR_INIT {color[0], color[1], color[2], color[3]} + va[0] = (struct vertex){ {x0, y0}, {tx0 / tex_w, ty0 / tex_h}, COLOR_INIT }; + va[1] = (struct vertex){ {x0, y1}, {tx0 / tex_w, ty1 / tex_h}, COLOR_INIT }; + va[2] = (struct vertex){ {x1, y0}, {tx1 / tex_w, ty0 / tex_h}, COLOR_INIT }; + va[3] = (struct vertex){ {x1, y1}, {tx1 / tex_w, ty1 / tex_h}, COLOR_INIT }; + va[4] = va[2]; + va[5] = va[1]; +#undef COLOR_INIT +} + +static void generate_verts(struct mpgl_osd_part *part, struct gl_transform t) +{ + int num_vertices = part->num_subparts * 6; + MP_TARRAY_GROW(part, part->vertices, part->num_vertices + num_vertices); + + for (int n = 0; n < part->num_subparts; n++) { + struct sub_bitmap *b = &part->subparts[n]; + struct vertex *va = &part->vertices[part->num_vertices]; + + // NOTE: the blend color is used with SUBBITMAP_LIBASS only, so it + // doesn't matter that we upload garbage for the other formats + uint32_t c = b->libass.color; + uint8_t color[4] = { c >> 24, (c >> 16) & 0xff, + (c >> 8) & 0xff, 255 - (c & 0xff) }; + + write_quad(&va[n * 6], t, + b->x, b->y, b->x + b->dw, b->y + b->dh, + b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h, + part->w, part->h, color); + } + + part->num_vertices += num_vertices; +} + +// number of screen divisions per axis (x=0, y=1) for the current 3D mode +static void get_3d_side_by_side(int stereo_mode, int div[2]) +{ + div[0] = div[1] = 1; + switch (stereo_mode) { + case MP_STEREO3D_SBS2L: + case MP_STEREO3D_SBS2R: div[0] = 2; break; + case MP_STEREO3D_AB2R: + case MP_STEREO3D_AB2L: div[1] = 2; break; + } +} + +void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc, struct fbodst target) +{ + struct mpgl_osd_part *part = ctx->parts[index]; + + int div[2]; + get_3d_side_by_side(ctx->stereo_mode, div); + + part->num_vertices = 0; + + for (int x = 0; x < div[0]; x++) { + for (int y = 0; y < div[1]; y++) { + struct gl_transform t; + gl_transform_ortho_fbodst(&t, target); + + float a_x = ctx->osd_res.w * x; + float a_y = ctx->osd_res.h * y; + t.t[0] += a_x * t.m[0][0] + a_y * t.m[1][0]; + t.t[1] += a_x * t.m[0][1] + a_y * t.m[1][1]; + + generate_verts(part, t); + } + } + + const int *factors = &blend_factors[part->format][0]; + gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); + + gl_sc_dispatch_draw(sc, target.tex, part->vertices, part->num_vertices); +} + +static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) +{ + int div[2]; + get_3d_side_by_side(stereo_mode, div); + + res.w /= div[0]; + res.h /= div[1]; + ctx->osd_res = res; +} + +void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, + int stereo_mode, int draw_flags) +{ + for (int n = 0; n < MAX_OSD_PARTS; n++) + ctx->parts[n]->num_subparts = 0; + + set_res(ctx, res, stereo_mode); + + osd_draw(ctx->osd, ctx->osd_res, pts, draw_flags, ctx->formats, gen_osd_cb, ctx); + ctx->stereo_mode = stereo_mode; + + // Parts going away does not necessarily result in gen_osd_cb() being called + // (not even with num_parts==0), so check this separately. + for (int n = 0; n < MAX_OSD_PARTS; n++) { + struct mpgl_osd_part *part = ctx->parts[n]; + if (part->num_subparts != part->prev_num_subparts) + ctx->change_flag = true; + part->prev_num_subparts = part->num_subparts; + } +} + +// See osd_resize() for remarks. This function is an optional optimization too. +void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) +{ + set_res(ctx, res, stereo_mode); + osd_resize(ctx->osd, ctx->osd_res); +} + +bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, + double pts) +{ + ctx->change_flag = false; + mpgl_osd_generate(ctx, *res, pts, 0, 0); + return ctx->change_flag; +} diff --git a/video/out/gpu/osd.h b/video/out/gpu/osd.h new file mode 100644 index 0000000000..6c2b886de3 --- /dev/null +++ b/video/out/gpu/osd.h @@ -0,0 +1,25 @@ +#ifndef MPLAYER_GL_OSD_H +#define MPLAYER_GL_OSD_H + +#include +#include + +#include "utils.h" +#include "shader_cache.h" +#include "sub/osd.h" + +struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, + struct osd_state *osd); +void mpgl_osd_destroy(struct mpgl_osd *ctx); + +void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, + int stereo_mode, int draw_flags); +void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode); +bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc); +void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc, struct fbodst target); +bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, + double pts); + +#endif diff --git a/video/out/gpu/ra.c b/video/out/gpu/ra.c new file mode 100644 index 0000000000..ef1de54d1a --- /dev/null +++ b/video/out/gpu/ra.c @@ -0,0 +1,327 @@ +#include "common/common.h" +#include "common/msg.h" +#include "video/img_format.h" + +#include "ra.h" + +struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params) +{ + return ra->fns->tex_create(ra, params); +} + +void ra_tex_free(struct ra *ra, struct ra_tex **tex) +{ + if (*tex) + ra->fns->tex_destroy(ra, *tex); + *tex = NULL; +} + +struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params) +{ + return ra->fns->buf_create(ra, params); +} + +void ra_buf_free(struct ra *ra, struct ra_buf **buf) +{ + if (*buf) + ra->fns->buf_destroy(ra, *buf); + *buf = NULL; +} + +void ra_free(struct ra **ra) +{ + if (*ra) + (*ra)->fns->destroy(*ra); + talloc_free(*ra); + *ra = NULL; +} + +size_t ra_vartype_size(enum ra_vartype type) +{ + switch (type) { + case RA_VARTYPE_INT: return sizeof(int); + case RA_VARTYPE_FLOAT: return sizeof(float); + case RA_VARTYPE_BYTE_UNORM: return 1; + default: return 0; + } +} + +struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input) +{ + size_t el_size = ra_vartype_size(input->type); + if (!el_size) + return (struct ra_layout){0}; + + // host data is always tightly packed + return (struct ra_layout) { + .align = 1, + .stride = el_size * input->dim_v, + .size = el_size * input->dim_v * input->dim_m, + }; +} + +static struct ra_renderpass_input *dup_inputs(void *ta_parent, + const struct ra_renderpass_input *inputs, int num_inputs) +{ + struct ra_renderpass_input *res = + talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0])); + for (int n = 0; n < num_inputs; n++) + res[n].name = talloc_strdup(res, res[n].name); + return res; +} + +// Return a newly allocated deep-copy of params. +struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); + *res = *params; + res->inputs = dup_inputs(res, res->inputs, res->num_inputs); + res->vertex_attribs = + dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs); + res->cached_program = bstrdup(res, res->cached_program); + res->vertex_shader = talloc_strdup(res, res->vertex_shader); + res->frag_shader = talloc_strdup(res, res->frag_shader); + res->compute_shader = talloc_strdup(res, res->compute_shader); + return res; +}; + + +// Return whether this is a tightly packed format with no external padding and +// with the same bit size/depth in all components, and the shader returns +// components in the same