summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2021-04-09 09:14:54 +0200
committerNiklas Haas <github-daiK1o@haasn.dev>2021-11-03 14:09:27 +0100
commit9d5d9b24240efe98cf99bbda2cb5280b025506d8 (patch)
treec76c6f2437804732d84d18bd91a41641fc5bdd77
parent872015813c6e181900faab5d1e53bb636bf8757f (diff)
downloadmpv-9d5d9b24240efe98cf99bbda2cb5280b025506d8.tar.bz2
mpv-9d5d9b24240efe98cf99bbda2cb5280b025506d8.tar.xz
vo_gpu_next: add new libplacebo-based renderer
As discussed in #8799, this will eventually replace vo_gpu. However, it is not yet complete. Currently missing: - OpenGL contexts - hardware decoding - blend-subtitles=video - VOCTRL_SCREENSHOT However, it's usable enough to cover most use cases, and as such is enough to start getting in some crucial testing.
-rw-r--r--DOCS/interface-changes.rst5
-rw-r--r--DOCS/man/options.rst81
-rw-r--r--DOCS/man/vo.rst12
-rw-r--r--video/out/placebo/utils.c118
-rw-r--r--video/out/placebo/utils.h10
-rw-r--r--video/out/vo.c4
-rw-r--r--video/out/vo_gpu_next.c1264
-rw-r--r--wscript6
-rw-r--r--wscript_build.py1
9 files changed, 1499 insertions, 2 deletions
diff --git a/DOCS/interface-changes.rst b/DOCS/interface-changes.rst
index 84418c5ce3..6aa6fd907d 100644
--- a/DOCS/interface-changes.rst
+++ b/DOCS/interface-changes.rst
@@ -26,6 +26,11 @@ Interface changes
::
+ --- mpv 0.35.0 ---
+ - add the `--vo=gpu-next` video output driver, as well as the options
+ `--allow-delayed-peak-detect`, `--builtin-scalers`,
+ `--interpolation-preserve` `--lut`, `--lut-type`, `--image-lut`,
+ `--image-lut-type` and `--target-lut` along with it.
--- mpv 0.34.0 ---
- deprecate selecting by card number with `--drm-connector`, add
`--drm-device` which can be used instead
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 3ade7f764c..4b97915734 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -5013,8 +5013,9 @@ ALSA audio output options
GPU renderer options
-----------------------
-The following video options are currently all specific to ``--vo=gpu`` and
-``--vo=libmpv`` only, which are the only VOs that implement them.
+The following video options are currently all specific to ``--vo=gpu``,
+``--vo=libmpv`` and ``--vo=gpu-next``, which are the only VOs that implement
+them.
``--scale=<filter>``
The filter function to use when upscaling video.
@@ -5191,6 +5192,12 @@ The following video options are currently all specific to ``--vo=gpu`` and
will reproduce the source image perfectly if no scaling is performed.
Enabled by default. Note that this option never affects ``--cscale``.
+``--builtin-scalers``
+ Allow using faster built-in replacements for common scalers such as
+ ``nearest``, ``bilinear`` or ``bicubic``. These have the disadvantage of
+ not being configurable, unlike normal scaler kernels. Defaults to
+ enabled. (This option only affects ``--vo=gpu-next``)
+
``--correct-downscaling``
When using convolution based filters, extend the filter size when
downscaling. Increases quality, but reduces performance while downscaling.
@@ -5267,6 +5274,15 @@ The following video options are currently all specific to ``--vo=gpu`` and
Set this to ``-1`` to disable this logic.
+``--interpolation-preserve``
+ Preserve the previous frames' interpolated results even when renderer
+ parameters are changed - with the exception of options related to
+ cropping and video placement, which always invalidate the cache. Enabling
+ this option makes dynamic updates of renderer settings slightly smoother at
+ the cost of slightly higher latency in response to such changes. Defaults
+ to on. (Only affects ``--vo=gpu-next``, note that ``-vo=gpu`` always
+ invalidates interpolated frames)
+
``--opengl-pbo``
Enable use of PBOs. On some drivers this can be faster, especially if the
source video size is huge (e.g. so called "4K" video). On other drivers it
@@ -6139,6 +6155,29 @@ The following video options are currently all specific to ``--vo=gpu`` and
NOTE: Only implemented on macOS.
+``--image-lut=<file>``
+ Specifies a custom LUT file (in Adobe .cube format) to apply to the colors
+ during image decoding. The exact interpretation of the LUT depends on
+ the value of ``--image-lut-type``. (Only for ``--vo=gpu-next``)
+
+``--image-lut-type=<value>``
+ Controls the interpretation of color values fed to and from the LUT
+ specified as ``--image-lut``. Valid values are:
+
+ auto
+ Chooses the interpretation of the LUT automatically from tagged
+ metadata, and otherwise falls back to ``native``. (Default)
+ native
+ Applied to the raw image contents in its native colorspace, before
+ decoding to RGB. For example, for a HDR10 image, this would be fed
+ PQ-encoded YCbCr values in the range 0.0 - 1.0.
+ normalized
+ Applied to the normalized RGB image contents, after decoding from
+ its native color encoding, but before linearization.
+ conversion
+ Fully replaces the color decoding. A LUT of this type should ingest the
+ image's native colorspace and output normalized non-linear RGB.
+
``--target-prim=<value>``
Specifies the primaries of the display. Video colors will be adapted to
this colorspace when ICC color management is not being used. Valid values
@@ -6254,6 +6293,12 @@ The following video options are currently all specific to ``--vo=gpu`` and
In such a configuration, we highly recommend setting ``--tone-mapping``
to ``mobius`` or even ``clip``.
+``--target-lut=<file>``
+ Specifies a custom LUT file (in Adobe .cube format) to apply to the colors
+ before display on-screen. This LUT is fed values in normalized RGB, after
+ encoding into the target colorspace, so after the application of
+ ``--target-trc``. (Only for ``--vo=gpu-next``)
+
``--tone-mapping=<value>``
Specifies the algorithm used for tone-mapping images onto the target
display. This is relevant for both HDR->SDR conversion as well as gamut
@@ -6335,6 +6380,14 @@ The following video options are currently all specific to ``--vo=gpu`` and
The special value ``auto`` (default) will enable HDR peak computation
automatically if compute shaders and SSBOs are supported.
+``--allow-delayed-peak-detect``
+ When using ``--hdr-compute-peak``, allow delaying the detected peak by a
+ frame when beneficial for performance. In particular, this is required to
+ avoid an unnecessary FBO indirection when no advanced rendering is required
+ otherwise. Has no effect if there already is an indirect pass, such as when
+ advanced scaling is enabled. Defaults to on. (Only affects
+ ``--vo=gpu-next``, note that ``--vo=gpu`` always delays the peak.)
+
``--hdr-peak-decay-rate=<1.0..1000.0>``
The decay rate used for the HDR peak detection algorithm (default: 100.0).
This is only relevant when ``--hdr-compute-peak`` is enabled. Higher values
@@ -6456,6 +6509,30 @@ The following video options are currently all specific to ``--vo=gpu`` and
value ``inf`` causes the BT.1886 curve to be treated as a pure power gamma
2.4 function.
+``--lut=<file>``
+ Specifies a custom LUT (in Adobe .cube format) to apply to the colors
+ as part of color conversion. The exact interpretation depends on the value
+ of ``--lut-type``. (Only for ``--vo=gpu-next``)
+
+``--lut-type=<value>``
+ Controls the interpretation of color values fed to and from the LUT
+ specified as ``--lut``. Valid values are:
+
+ auto
+ Chooses the interpretation of the LUT automatically from tagged
+ metadata, and otherwise falls back to ``native``. (Default)
+ native
+ Applied to raw image contents in its native RGB colorspace (non-linear
+ light), before conversion to the output color space.
+ normalized
+ Applied to the normalized RGB image contents, in linear light, before
+ conversion to the output color space.
+ conversion
+ Fully replaces the conversion from the image color space to the output
+ color space. If such a LUT is present, it has the highest priority, and
+ overrides any ICC profiles, as well as options related to tone mapping
+ and output colorimetry (``--target-prim``, ``--target-trc`` etc.).
+
``--blend-subtitles=<yes|video|no>``
Blend subtitles directly onto upscaled video frames, before interpolation
and/or color management (default: no). Enabling this causes subtitles to be
diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index 7632f3c406..7897336266 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -257,6 +257,18 @@ Available video output drivers are:
with ``rgb32f``. If you have problems, you can also try enabling the
``--gpu-dumb-mode=yes`` option.
+``gpu-next``
+ Experimental video renderer based on ``libplacebo``. This supports almost
+ the same set of features as ``--vo=gpu``. See `GPU renderer options`_ for a
+ list.
+
+ Currently, this only supports ``--gpu-api=vulkan``, and no hardware
+ decoding. Unlike ``--vo=gpu``, the FBO formats are not tunable, but you can
+ still set ``--gpu-dumb-mode=yes`` to forcibly disable their use.
+
+ Should generally be faster and higher quality, but some features may still
+ be missing or misbehave. Expect (and report!) bugs.
+
``sdl``
SDL 2.0+ Render video output driver, depending on system with or without
hardware acceleration. Should work on all platforms supported by SDL 2.0.
diff --git a/video/out/placebo/utils.c b/video/out/placebo/utils.c
index 616914c27b..a4bd829880 100644
--- a/video/out/placebo/utils.c
+++ b/video/out/placebo/utils.c
@@ -58,3 +58,121 @@ void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing)
.log_priv = log,
});
}
+
+enum pl_color_primaries mp_prim_to_pl(enum mp_csp_prim prim)
+{
+ switch (prim) {
+ case MP_CSP_PRIM_AUTO: return PL_COLOR_PRIM_UNKNOWN;
+ case MP_CSP_PRIM_BT_601_525: return PL_COLOR_PRIM_BT_601_525;
+ case MP_CSP_PRIM_BT_601_625: return PL_COLOR_PRIM_BT_601_625;
+ case MP_CSP_PRIM_BT_709: return PL_COLOR_PRIM_BT_709;
+ case MP_CSP_PRIM_BT_2020: return PL_COLOR_PRIM_BT_2020;
+ case MP_CSP_PRIM_BT_470M: return PL_COLOR_PRIM_BT_470M;
+ case MP_CSP_PRIM_APPLE: return PL_COLOR_PRIM_APPLE;
+ case MP_CSP_PRIM_ADOBE: return PL_COLOR_PRIM_ADOBE;
+ case MP_CSP_PRIM_PRO_PHOTO: return PL_COLOR_PRIM_PRO_PHOTO;
+ case MP_CSP_PRIM_CIE_1931: return PL_COLOR_PRIM_CIE_1931;
+ case MP_CSP_PRIM_DCI_P3: return PL_COLOR_PRIM_DCI_P3;
+ case MP_CSP_PRIM_DISPLAY_P3: return PL_COLOR_PRIM_DISPLAY_P3;
+ case MP_CSP_PRIM_V_GAMUT: return PL_COLOR_PRIM_V_GAMUT;
+ case MP_CSP_PRIM_S_GAMUT: return PL_COLOR_PRIM_S_GAMUT;
+ case MP_CSP_PRIM_COUNT: return PL_COLOR_PRIM_COUNT;
+ }
+
+ MP_UNREACHABLE();
+}
+
+enum pl_color_transfer mp_trc_to_pl(enum mp_csp_trc trc)
+{
+ switch (trc) {
+ case MP_CSP_TRC_AUTO: return PL_COLOR_TRC_UNKNOWN;
+ case MP_CSP_TRC_BT_1886: return PL_COLOR_TRC_BT_1886;
+ case MP_CSP_TRC_SRGB: return PL_COLOR_TRC_SRGB;
+ case MP_CSP_TRC_LINEAR: return PL_COLOR_TRC_LINEAR;
+ case MP_CSP_TRC_GAMMA18: return PL_COLOR_TRC_GAMMA18;
+ case MP_CSP_TRC_GAMMA20: return PL_COLOR_TRC_UNKNOWN; // missing
+ case MP_CSP_TRC_GAMMA22: return PL_COLOR_TRC_GAMMA22;
+ case MP_CSP_TRC_GAMMA24: return PL_COLOR_TRC_UNKNOWN; // missing
+ case MP_CSP_TRC_GAMMA26: return PL_COLOR_TRC_UNKNOWN; // missing
+ case MP_CSP_TRC_GAMMA28: return PL_COLOR_TRC_GAMMA28;
+ case MP_CSP_TRC_PRO_PHOTO: return PL_COLOR_TRC_PRO_PHOTO;
+ case MP_CSP_TRC_PQ: return PL_COLOR_TRC_PQ;
+ case MP_CSP_TRC_HLG: return PL_COLOR_TRC_HLG;
+ case MP_CSP_TRC_V_LOG: return PL_COLOR_TRC_V_LOG;
+ case MP_CSP_TRC_S_LOG1: return PL_COLOR_TRC_S_LOG1;
+ case MP_CSP_TRC_S_LOG2: return PL_COLOR_TRC_S_LOG2;
+ case MP_CSP_TRC_COUNT: return PL_COLOR_TRC_COUNT;
+ }
+
+ MP_UNREACHABLE();
+}
+
+enum pl_color_light mp_light_to_pl(enum mp_csp_light light)
+{
+ switch (light) {
+ case MP_CSP_LIGHT_AUTO: return PL_COLOR_LIGHT_UNKNOWN;
+ case MP_CSP_LIGHT_DISPLAY: return PL_COLOR_LIGHT_DISPLAY;
+ case MP_CSP_LIGHT_SCENE_HLG: return PL_COLOR_LIGHT_SCENE_HLG;
+ case MP_CSP_LIGHT_SCENE_709_1886: return PL_COLOR_LIGHT_SCENE_709_1886;
+ case MP_CSP_LIGHT_SCENE_1_2: return PL_COLOR_LIGHT_SCENE_1_2;
+ case MP_CSP_LIGHT_COUNT: return PL_COLOR_LIGHT_COUNT;
+ }
+
+ MP_UNREACHABLE();
+}
+
+enum pl_color_system mp_csp_to_pl(enum mp_csp csp)
+{
+ switch (csp) {
+ case MP_CSP_AUTO: return PL_COLOR_SYSTEM_UNKNOWN;
+ case MP_CSP_BT_601: return PL_COLOR_SYSTEM_BT_601;
+ case MP_CSP_BT_709: return PL_COLOR_SYSTEM_BT_709;
+ case MP_CSP_SMPTE_240M: return PL_COLOR_SYSTEM_SMPTE_240M;
+ case MP_CSP_BT_2020_NC: return PL_COLOR_SYSTEM_BT_2020_NC;
+ case MP_CSP_BT_2020_C: return PL_COLOR_SYSTEM_BT_2020_C;
+ case MP_CSP_RGB: return PL_COLOR_SYSTEM_RGB;
+ case MP_CSP_XYZ: return PL_COLOR_SYSTEM_XYZ;
+ case MP_CSP_YCGCO: return PL_COLOR_SYSTEM_YCGCO;
+ case MP_CSP_COUNT: return PL_COLOR_SYSTEM_COUNT;
+ }
+
+ MP_UNREACHABLE();
+}
+
+enum pl_color_levels mp_levels_to_pl(enum mp_csp_levels levels)
+{
+ switch (levels) {
+ case MP_CSP_LEVELS_AUTO: return PL_COLOR_LEVELS_UNKNOWN;
+ case MP_CSP_LEVELS_TV: return PL_COLOR_LEVELS_TV;
+ case MP_CSP_LEVELS_PC: return PL_COLOR_LEVELS_PC;
+ case MP_CSP_LEVELS_COUNT: return PL_COLOR_LEVELS_COUNT;
+ }
+
+ MP_UNREACHABLE();
+}
+
+enum pl_alpha_mode mp_alpha_to_pl(enum mp_alpha_type alpha)
+{
+ switch (alpha) {
+ // Note: Older versions of libplacebo incorreclty handled PL_ALPHA_UNKNOWN
+ // as premultiplied, so explicitly default this to independent instead.
+ case MP_ALPHA_AUTO: return PL_ALPHA_INDEPENDENT;
+ case MP_ALPHA_STRAIGHT: return PL_ALPHA_INDEPENDENT;
+ case MP_ALPHA_PREMUL: return PL_ALPHA_PREMULTIPLIED;
+ }
+
+ MP_UNREACHABLE();
+}
+
+enum pl_chroma_location mp_chroma_to_pl(enum mp_chroma_location chroma)
+{
+ switch (chroma) {
+ case MP_CHROMA_AUTO: return PL_CHROMA_UNKNOWN;
+ case MP_CHROMA_TOPLEFT: return PL_CHROMA_TOP_LEFT;
+ case MP_CHROMA_LEFT: return PL_CHROMA_LEFT;
+ case MP_CHROMA_CENTER: return PL_CHROMA_CENTER;
+ case MP_CHROMA_COUNT: return PL_CHROMA_COUNT;
+ }
+
+ MP_UNREACHABLE();
+}
diff --git a/video/out/placebo/utils.h b/video/out/placebo/utils.h
index e6b43fcac3..a28a3a6793 100644
--- a/video/out/placebo/utils.h
+++ b/video/out/placebo/utils.h
@@ -2,9 +2,11 @@
#include "common/common.h"
#include "common/msg.h"
+#include "video/csputils.h"
#include <libplacebo/common.h>
#include <libplacebo/context.h>
+#include <libplacebo/colorspace.h>
void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing);
@@ -17,3 +19,11 @@ static inline struct pl_rect2d mp_rect2d_to_pl(struct mp_rect rc)
.y1 = rc.y1,
};
}
+
+enum pl_color_primaries mp_prim_to_pl(enum mp_csp_prim prim);
+enum pl_color_transfer mp_trc_to_pl(enum mp_csp_trc trc);
+enum pl_color_light mp_light_to_pl(enum mp_csp_light light);
+enum pl_color_system mp_csp_to_pl(enum mp_csp csp);
+enum pl_color_levels mp_levels_to_pl(enum mp_csp_levels levels);
+enum pl_alpha_mode mp_alpha_to_pl(enum mp_alpha_type alpha);
+enum pl_chroma_location mp_chroma_to_pl(enum mp_chroma_location chroma);
diff --git a/video/out/vo.c b/video/out/vo.c
index 4cb15123ab..80fcaad4d0 100644
--- a/video/out/vo.c
+++ b/video/out/vo.c
@@ -52,6 +52,7 @@ extern const struct vo_driver video_out_x11;
extern const struct vo_driver video_out_vdpau;
extern const struct vo_driver video_out_xv;
extern const struct vo_driver video_out_gpu;
+extern const struct vo_driver video_out_gpu_next;
extern const struct vo_driver video_out_libmpv;
extern const struct vo_driver video_out_null;
extern const struct vo_driver video_out_image;
@@ -73,6 +74,9 @@ const struct vo_driver *const video_out_drivers[] =
&video_out_mediacodec_embed,
#endif
&video_out_gpu,
+#if HAVE_LIBPLACEBO_V4
+ &video_out_gpu_next,
+#endif
#if HAVE_VDPAU
&video_out_vdpau,
#endif
diff --git a/video/out/vo_gpu_next.c b/video/out/vo_gpu_next.c
new file mode 100644
index 0000000000..98eb1615d1
--- /dev/null
+++ b/video/out/vo_gpu_next.c
@@ -0,0 +1,1264 @@
+/*
+ * Copyright (C) 2021 Niklas Haas
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libplacebo/renderer.h>
+#include <libplacebo/shaders/lut.h>
+#include <libplacebo/utils/libav.h>
+#include <libplacebo/utils/frame_queue.h>
+
+#ifdef PL_HAVE_LCMS
+#include <libplacebo/shaders/icc.h>
+#endif
+
+#include "config.h"
+#include "common/common.h"
+#include "options/m_config.h"
+#include "options/path.h"
+#include "osdep/io.h"
+#include "stream/stream.h"
+#include "video/mp_image.h"
+#include "video/fmt-conversion.h"
+#include "placebo/utils.h"
+#include "gpu/context.h"
+#include "gpu/video.h"
+#include "gpu/video_shaders.h"
+#include "sub/osd.h"
+
+#if HAVE_VULKAN
+#include "vulkan/context.h"
+#endif
+
+struct osd_entry {
+ pl_tex tex;
+ struct pl_overlay_part *parts;
+ int num_parts;
+};
+
+struct osd_state {
+ struct osd_entry entries[MAX_OSD_PARTS];
+ struct pl_overlay overlays[MAX_OSD_PARTS];
+};
+
+struct scaler_params {
+ struct pl_filter_config config;
+ struct pl_filter_function kernel;
+ struct pl_filter_function window;
+};
+
+struct user_hook {
+ char *path;
+ const struct pl_hook *hook;
+};
+
+struct user_lut {
+ char *opt;
+ char *path;
+ int type;
+ struct pl_custom_lut *lut;
+};
+
+struct priv {
+ struct mp_log *log;
+ struct mpv_global *global;
+ struct ra_ctx *ra_ctx;
+
+ pl_log pllog;
+ pl_gpu gpu;
+ pl_renderer rr;
+ pl_queue queue;
+ pl_swapchain sw;
+ pl_fmt osd_fmt[SUBBITMAP_COUNT];
+ pl_tex *sub_tex;
+ int num_sub_tex;
+
+ struct mp_rect src, dst;
+ struct mp_osd_res osd_res;
+ struct osd_state osd_state;
+
+ uint64_t last_id;
+ double last_pts;
+ bool is_interpolated;
+ bool want_reset;
+
+ struct m_config_cache *opts_cache;
+ struct mp_csp_equalizer_state *video_eq;
+ struct pl_render_params params;
+ struct pl_deband_params deband;
+ struct pl_sigmoid_params sigmoid;
+ struct pl_color_adjustment color_adjustment;
+ struct pl_peak_detect_params peak_detect;
+ struct pl_color_map_params color_map;
+ struct pl_dither_params dither;
+ struct scaler_params scalers[SCALER_COUNT];
+ const struct pl_hook **hooks; // storage for `params.hooks`
+
+#ifdef PL_HAVE_LCMS
+ struct pl_icc_params icc;
+ struct pl_icc_profile icc_profile;
+ char *icc_path;
+#endif
+
+ struct user_lut image_lut;
+ struct user_lut target_lut;
+ struct user_lut lut;
+
+ // Cached shaders, preserved across options updates
+ struct user_hook *user_hooks;
+ int num_user_hooks;
+
+ // Performance data of last frame
+ struct voctrl_performance_data perf;
+
+ int delayed_peak;
+ int builtin_scalers;
+ int inter_preserve;
+};
+
+static void update_render_options(struct priv *p);
+static void update_lut(struct priv *p, struct user_lut *lut);
+
+// This struct is stored at the end of DR-allocated buffers, and serves to both
+// detect such buffers and hold the reference to the actual GPU buffer.
+struct dr_buf {
+ uint64_t sentinel[2];
+ pl_gpu gpu;
+ pl_buf buf;
+};
+
+static const uint64_t dr_magic[2] = { 0xc6e9222474db53ae, 0x9d49b2de6c3b563e };
+static const size_t dr_align = offsetof(struct { char c; struct dr_buf dr; }, dr);
+static inline struct dr_buf *dr_header(void *ptr, size_t size)
+{
+ uintptr_t start = (uintptr_t) ptr + size - sizeof(struct dr_buf);
+ uintptr_t aligned = MP_ALIGN_DOWN(start, dr_align);
+ assert(aligned >= (uintptr_t) ptr);
+ return (struct dr_buf *) aligned;
+}
+
+static pl_buf get_dr_buf(struct mp_image *mpi)
+{
+ if (!mpi->bufs[0] || mpi->bufs[0]->size < sizeof(struct dr_buf))
+ return NULL;
+
+ struct dr_buf *dr = dr_header(mpi->bufs[0]->data, mpi->bufs[0]->size);
+ if (memcmp(dr->sentinel, dr_magic, sizeof(dr_magic)) == 0)
+ return dr->buf;
+
+ return NULL;
+}
+
+static void free_dr_buf(void *opaque, uint8_t *data)
+{
+ struct dr_buf *dr = opaque;
+ // Can't use `&dr->buf` because it gets freed during `pl_buf_destroy`
+ pl_buf_destroy(dr->gpu, &(pl_buf) { dr->buf });
+}
+
+static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h,
+ int stride_align)
+{
+ struct priv *p = vo->priv;
+ pl_gpu gpu = p->gpu;
+ if (!gpu->limits.thread_safe || !gpu->limits.max_mapped_size)
+ return NULL;
+
+ int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align);
+ if (size < 0)
+ return NULL;
+
+ pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) {
+ .size = size + stride_align + sizeof(struct dr_buf) + dr_align,
+ .memory_type = PL_BUF_MEM_HOST,
+ .host_mapped = true,
+ });
+
+ if (!buf)
+ return NULL;
+
+ // Store the DR header at the end of the allocation
+ struct dr_buf *dr = dr_header(buf->data, buf->params.size);
+ memcpy(dr->sentinel, dr_magic, sizeof(dr_magic));
+ dr->gpu = gpu;
+ dr->buf = buf;
+
+ struct mp_image *mpi = mp_image_from_buffer(imgfmt, w, h, stride_align,
+ buf->data, buf->params.size,
+ dr, free_dr_buf);
+ if (!mpi) {
+ pl_buf_destroy(gpu, &buf);
+ return NULL;
+ }
+
+ return mpi;
+}
+
+static void write_overlays(struct vo *vo, struct mp_osd_res res, double pts,
+ int flags, struct osd_state *state,
+ struct pl_frame *frame)
+{
+ struct priv *p = vo->priv;
+ static const bool subfmt_all[SUBBITMAP_COUNT] = {
+ [SUBBITMAP_LIBASS] = true,
+ [SUBBITMAP_RGBA] = true,
+ };
+
+ struct sub_bitmap_list *subs = osd_render(vo->osd, res, pts, flags, subfmt_all);
+ frame->num_overlays = 0;
+ frame->overlays = state->overlays;
+
+ for (int n = 0; n < subs->num_items; n++) {
+ const struct sub_bitmaps *item = subs->items[n];
+ if (!item->num_parts || !item->packed)
+ continue;
+ struct osd_entry *entry = &state->entries[item->render_index];
+ pl_fmt tex_fmt = p->osd_fmt[item->format];
+ if (!entry->tex)
+ MP_TARRAY_POP(p->sub_tex, p->num_sub_tex, &entry->tex);
+ bool ok = pl_tex_recreate(p->gpu, &entry->tex, &(struct pl_tex_params) {
+ .format = tex_fmt,
+ .w = MPMAX(item->packed_w, entry->tex ? entry->tex->params.w : 0),
+ .h = MPMAX(item->packed_h, entry->tex ? entry->tex->params.h : 0),
+ .host_writable = true,
+ .sampleable = true,
+ });
+ if (!ok) {
+ MP_ERR(vo, "Failed recreating OSD texture!\n");
+ break;
+ }
+ ok = pl_tex_upload(p->gpu, &(struct pl_tex_transfer_params) {
+ .tex = entry->tex,
+ .rc = { .x1 = item->packed_w, .y1 = item->packed_h, },
+ .stride_w = item->packed->stride[0] / tex_fmt->texel_size,
+ .ptr = item->packed->planes[0],
+ });
+ if (!ok) {
+ MP_ERR(vo, "Failed uploading OSD texture!\n");
+ break;
+ }
+
+ entry->num_parts = 0;
+ for (int i = 0; i < item->num_parts; i++) {
+ const struct sub_bitmap *b = &item->parts[i];
+ uint32_t c = b->libass.color;
+ MP_TARRAY_APPEND(p, entry->parts, entry->num_parts, (struct pl_overlay_part) {
+ .src = { b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h },
+ .dst = { b->x, b->y, b->x + b->dw, b->y + b->dh },
+ .color = {
+ (c >> 24) / 255.0,
+ ((c >> 16) & 0xFF) / 255.0,
+ ((c >> 8) & 0xFF) / 255.0,
+ 1.0 - (c & 0xFF) / 255.0,
+ }
+ });
+ }
+
+ struct pl_overlay *ol = &state->overlays[frame->num_overlays++];
+ *ol = (struct pl_overlay) {
+ .tex = entry->tex,
+ .parts = entry->parts,
+ .num_parts = entry->num_parts,
+ .color = frame->color,
+ };
+
+ switch (item->format) {
+ case SUBBITMAP_RGBA:
+ ol->mode = PL_OVERLAY_NORMAL;
+ ol->repr.alpha = PL_ALPHA_PREMULTIPLIED;
+ break;
+ case SUBBITMAP_LIBASS:
+ ol->mode = PL_OVERLAY_MONOCHROME;
+ ol->repr.alpha = PL_ALPHA_INDEPENDENT;
+ break;
+ }
+ }
+
+ talloc_free(subs);
+}
+
+struct frame_priv {
+ struct vo *vo;
+ struct osd_state subs;
+};
+
+static int plane_data_from_imgfmt(struct pl_plane_data out_data[4],
+ struct pl_bit_encoding *out_bits,
+ enum mp_imgfmt imgfmt)
+{
+ struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
+ if (!desc.num_planes || !(desc.flags & MP_IMGFLAG_HAS_COMPS))
+ return 0;
+
+ if (desc.flags & MP_IMGFLAG_HWACCEL)
+ return 0; // HW-accelerated frames need to be mapped differently
+
+ if (!(desc.flags & MP_IMGFLAG_NE))
+ return 0; // GPU endianness follows the host's
+
+ if (desc.flags & MP_IMGFLAG_PAL)
+ return 0; // Palette formats (currently) not supported in libplacebo
+
+ if ((desc.flags & MP_IMGFLAG_TYPE_FLOAT) && (desc.flags & MP_IMGFLAG_YUV))
+ return 0; // Floating-point YUV (currently) unsupported
+
+ bool any_padded = false;
+ for (int p = 0; p < desc.num_planes; p++) {
+ struct pl_plane_data *data = &out_data[p];
+ struct mp_imgfmt_comp_desc sorted[MP_NUM_COMPONENTS];
+ int num_comps = 0;
+ for (int c = 0; c < mp_imgfmt_desc_get_num_comps(&desc); c++) {
+ if (desc.comps[c].plane != p)
+ continue;
+
+ data->component_map[num_comps] = c;
+ sorted[num_comps] = desc.comps[c];
+ num_comps++;
+
+ // Sort components by offset order, while keeping track of the
+ // semantic mapping in `data->component_map`
+ for (int i = num_comps - 1; i > 0; i--) {
+ if (sorted[i].offset >= sorted[i - 1].offset)
+ break;
+ MPSWAP(struct mp_imgfmt_comp_desc, sorted[i], sorted[i - 1]);
+ MPSWAP(int, data->component_map[i], data->component_map[i - 1]);
+ }
+ }
+
+ uint64_t total_bits = 0;
+
+ // Fill in the pl_plane_data fields for each component
+ memset(data->component_size, 0, sizeof(data->component_size));
+ for (int c = 0; c < num_comps; c++) {
+ data->component_size[c] = sorted[c].size;
+ data->component_pad[c] = sorted[c].offset - total_bits;
+ total_bits += data->component_pad[c] + data->component_size[c];
+ any_padded |= sorted[c].pad;
+
+ // Ignore bit encoding of alpha channel
+ if (!out_bits || data->component_map[c] == PL_CHANNEL_A)
+ continue;
+
+ struct pl_bit_encoding bits = {
+ .sample_depth = data->component_size[c],
+ .color_depth = sorted[c].size - abs(sorted[c].pad),
+ .bit_shift = MPMAX(sorted[c].pad, 0),
+ };
+
+ if (p == 0 && c == 0) {
+ *out_bits = bits;
+ } else {
+ if (!pl_bit_encoding_equal(out_bits, &bits)) {
+ // Bit encoding differs between components/planes,
+ // cannot handle this
+ *out_bits = (struct pl_bit_encoding) {0};
+ out_bits = NULL;
+ }
+ }
+ }
+
+ if (total_bits % 8)
+ return 0; // pixel size is not byte-aligned
+
+ data->pixel_stride = total_bits / 8;
+ data->type = (desc.flags & MP_IMGFLAG_TYPE_FLOAT)
+ ? PL_FMT_FLOAT
+ : PL_FMT_UNORM;
+ }
+
+ if (any_padded && !out_bits)
+ return 0; // can't handle padded components without `pl_bit_encoding`
+
+ return desc.num_planes;
+}
+
+static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src,
+ struct pl_frame *frame)
+{
+ struct mp_image *mpi = src->frame_data;
+ struct frame_priv *fp = mpi->priv;
+ struct pl_plane_data data[4] = {0};
+ struct vo *vo = fp->vo;
+ struct priv *p = vo->priv;
+
+ // TODO: implement support for hwdec wrappers
+
+ *frame = (struct pl_frame) {
+ .num_planes = mpi->num_planes,
+ .color = {
+ .primaries = mp_prim_to_pl(mpi->params.color.primaries),
+ .transfer = mp_trc_to_pl(mpi->params.color.gamma),
+ .light = mp_light_to_pl(mpi->params.color.light),
+ .sig_peak = mpi->params.color.sig_peak,
+ },
+ .repr = {
+ .sys = mp_csp_to_pl(mpi->params.color.space),
+ .levels = mp_levels_to_pl(mpi->params.color.levels),
+ .alpha = mp_alpha_to_pl(mpi->params.alpha),
+ },
+ .profile = {
+ .data = mpi->icc_profile ? mpi->icc_profile->data : NULL,
+ .len = mpi->icc_profile ? mpi->icc_profile->size : 0,
+ },
+#if PL_API_VER >= 162
+ .rotation = mpi->params.rotate / 90,
+#endif
+ };
+
+ enum pl_chroma_location chroma = mp_chroma_to_pl(mpi->params.chroma_location);
+ int planes = plane_data_from_imgfmt(data, &frame->repr.bits, mpi->imgfmt);
+ for (int n = 0; n < planes; n++) {
+ data[n].width = mp_image_plane_w(mpi, n);
+ data[n].height = mp_image_plane_h(mpi, n);
+ data[n].row_stride = mpi->stride[n];
+ data[n].pixels = mpi->planes[n];
+
+ pl_buf buf = get_dr_buf(mpi);
+ if (buf) {
+ data[n].pixels = NULL;
+ data[n].buf = buf;
+ data[n].buf_offset = mpi->planes[n] - buf->data;
+ } else if (gpu->limits.callbacks) {
+ data[n].callback = talloc_free;
+ data[n].priv = mp_image_new_ref(mpi);
+ }
+
+ struct pl_plane *plane = &frame->planes[n];
+ if (!pl_upload_plane(gpu, plane, &tex[n], &data[n])) {
+ MP_ERR(vo, "Failed uploading frame!\n");
+ talloc_free(data[n].priv);
+ return false;
+ }
+
+ if (mpi->fmt.xs[n] || mpi->fmt.ys[n])
+ pl_chroma_location_offset(chroma, &plane->shift_x, &plane->shift_y);
+ }
+
+ // Compute a unique signature for any attached ICC profile. Wasteful in
+ // theory if the ICC profile is the same for multiple frames, but in
+ // practice ICC profiles are overwhelmingly going to be attached to
+ // still images so it shouldn't matter.
+ pl_icc_profile_compute_signature(&frame->profile);
+
+ // Generate subtitles for this frame
+ struct mp_osd_res vidres = {
+ .w = mpi->w, .h = mpi->h,
+ // compensate for anamorphic sources (render subtitles as normal)
+ .display_par = (float) mpi->params.p_h / mpi->params.p_w,
+ };
+ write_overlays(vo, vidres, mpi->pts, OSD_DRAW_SUB_ONLY, &fp->subs, frame);
+
+ // Update LUT attached to this frame
+ update_lut(p, &p->image_lut);
+ frame->lut = p->image_lut.lut;
+ frame->lut_type = p->image_lut.type;
+ return true;
+}
+
+static void unmap_frame(pl_gpu gpu, struct pl_frame *frame,
+ const struct pl_source_frame *src)
+{
+ struct mp_image *mpi = src->frame_data;
+ struct frame_priv *fp = mpi->priv;
+ struct priv *p = fp->vo->priv;
+ for (int i = 0; i < MP_ARRAY_SIZE(fp->subs.entries); i++) {
+ pl_tex tex = fp->subs.entries[i].tex;
+ if (tex)
+ MP_TARRAY_APPEND(p, p->sub_tex, p->num_sub_tex, tex);
+ }
+ talloc_free(mpi);
+}
+
+static void discard_frame(const struct pl_source_frame *src)
+{
+ struct mp_image *mpi = src->frame_data;
+ talloc_free(mpi);
+}
+
+static void info_callback(void *priv, const struct pl_render_info *info)
+{
+ struct vo *vo = priv;
+ struct priv *p = vo->priv;
+
+ int index;
+ struct mp_frame_perf *frame;
+ switch (info->stage) {
+ case PL_RENDER_STAGE_FRAME:
+ if (info->index > VO_PASS_PERF_MAX)
+ return; // silently ignore clipped passes, whatever
+ frame = &p->perf.fresh;
+ index = info->index;
+ break;