summaryrefslogtreecommitdiffstats
path: root/video/out
diff options
context:
space:
mode:
authorPhilip Langdale <philipl@overt.org>2022-03-12 11:21:29 -0800
committerPhilip Langdale <github.philipl@overt.org>2023-05-28 15:46:05 -0700
commit61e685594dce7fd5bf79c3fae65d962a1ea5e924 (patch)
tree06e5523a4016714d822e94c9c39fb9e0e9e2fb6e /video/out
parent085f3e31a091daf7d6649508b162cfcde826702a (diff)
downloadmpv-61e685594dce7fd5bf79c3fae65d962a1ea5e924.tar.bz2
mpv-61e685594dce7fd5bf79c3fae65d962a1ea5e924.tar.xz
hwdec_vulkan: add Vulkan HW Interop
Vulkan Video Decoding has finally become a reality, as it's now showing up in shipping drivers, and the ffmpeg support has been merged. With that in mind, this change introduces HW interop support for ffmpeg Vulkan frames. The implementation is functionally complete - it can display frames produced by hardware decoding, and it can work with ffmpeg vulkan filters. There are still various caveats due to gaps and bugs in drivers, so YMMV, as always. Primary testing has been done on Intel, AMD, and nvidia hardware on Linux with basic Windows testing on nvidia. Notable caveats: * Due to driver bugs, video decoding on nvidia does not work right now, unless you use the Vulkan Beta driver. It can be worked around, but requires ffmpeg changes that are not considered acceptable to merge. * Even if those work-arounds are applied, Vulkan filters will not work on video that was decoded by Vulkan, due to additional bugs in the nvidia drivers. The filters do work correctly on content decoded some other way, and then uploaded to Vulkan (eg: Decode with nvdec, upload with --vf=format=vulkan) * Vulkan filters can only be used with drivers that support VK_EXT_descriptor_buffer which doesn't include Intel ANV as yet. There is an MR outstanding for this. * When dealing with 1080p content, there may be some visual distortion in the bottom lines of frames due to chroma scaling incorporating the extra hidden lines at the bottom of the frame (1080p content is actually stored as 1088 lines), depending on the hardware/driver combination and the scaling algorithm. This cannot be easily addressed as the mechanical fix for it violates the Vulkan spec, and probably requires a spec change to resolve properly. All of these caveats will be fixed in either drivers or ffmpeg, and so will not require mpv changes (unless something unexpected happens) If you want to run on nvidia with the non-beta drivers, you can this ffmpeg tree with the work-around patches: * https://github.com/philipl/FFmpeg/tree/vulkan-nvidia-workarounds
Diffstat (limited to 'video/out')
-rw-r--r--video/out/gpu/hwdec.c4
-rw-r--r--video/out/hwdec/hwdec_vulkan.c332
2 files changed, 336 insertions, 0 deletions
diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c
index 8b1ab8c961..1e3edb0d8d 100644
--- a/video/out/gpu/hwdec.c
+++ b/video/out/gpu/hwdec.c
@@ -38,6 +38,7 @@ extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay;
extern const struct ra_hwdec_driver ra_hwdec_drmprime;
extern const struct ra_hwdec_driver ra_hwdec_drmprime_overlay;
extern const struct ra_hwdec_driver ra_hwdec_aimagereader;
+extern const struct ra_hwdec_driver ra_hwdec_vulkan;
const struct ra_hwdec_driver *const ra_hwdec_drivers[] = {
#if HAVE_VAAPI_EGL || HAVE_VAAPI_LIBPLACEBO
@@ -79,6 +80,9 @@ const struct ra_hwdec_driver *const ra_hwdec_drivers[] = {
#if HAVE_ANDROID_MEDIA_NDK
&ra_hwdec_aimagereader,
#endif
+#if HAVE_VULKAN_INTEROP
+ &ra_hwdec_vulkan,
+#endif
NULL
};
diff --git a/video/out/hwdec/hwdec_vulkan.c b/video/out/hwdec/hwdec_vulkan.c
new file mode 100644
index 0000000000..f85b9f4829
--- /dev/null
+++ b/video/out/hwdec/hwdec_vulkan.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2022 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+#include "video/out/gpu/hwdec.h"
+#include "video/out/vulkan/context.h"
+#include "video/out/placebo/ra_pl.h"
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_vulkan.h>
+
+struct vulkan_hw_priv {
+ struct mp_hwdec_ctx hwctx;
+ pl_gpu gpu;
+};
+
+struct vulkan_mapper_priv {
+ struct mp_image layout;
+ AVVkFrame *vkf;
+ pl_tex tex[4];
+};
+
+static void lock_queue(struct AVHWDeviceContext *ctx,
+ uint32_t queue_family, uint32_t index)
+{
+ pl_vulkan vulkan = ctx->user_opaque;
+ vulkan->lock_queue(vulkan, queue_family, index);
+}
+
+static void unlock_queue(struct AVHWDeviceContext *ctx,
+ uint32_t queue_family, uint32_t index)
+{
+ pl_vulkan vulkan = ctx->user_opaque;
+ vulkan->unlock_queue(vulkan, queue_family, index);
+}
+
+static int vulkan_init(struct ra_hwdec *hw)
+{
+ AVBufferRef *hw_device_ctx = NULL;
+ int ret = 0;
+ struct vulkan_hw_priv *p = hw->priv;
+
+ struct mpvk_ctx *vk = ra_vk_ctx_get(hw->ra_ctx);
+ if (!vk) {
+ MP_ERR(hw, "This is not a libplacebo vulkan gpu api context.\n");
+ return 0;
+ }
+
+ p->gpu = ra_pl_get(hw->ra_ctx->ra);
+ if (!p->gpu) {
+ MP_ERR(hw, "Failed to obtain pl_gpu.\n");
+ return 0;
+ }
+
+ /*
+ * libplacebo initialises all queues, but we still need to discover which
+ * one is the decode queue.
+ */
+ uint32_t num_qf = 0;
+ VkQueueFamilyProperties *qf = NULL;
+ vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, NULL);
+ if (!num_qf)
+ goto error;
+
+ qf = talloc_array(NULL, VkQueueFamilyProperties, num_qf);
+ vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, qf);
+
+ int decode_index = -1, decode_count = 0;
+ for (int i = 0; i < num_qf; i++) {
+ /*
+ * Pick the first discovered decode queue that we find. Maybe a day will
+ * come when this needs to be smarter, but I'm sure a bunch of other
+ * things will have to change too.
+ */
+ if ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
+ decode_index = i;
+ decode_count = qf[i].queueCount;
+ }
+ }
+
+ hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN);
+ if (!hw_device_ctx)
+ goto error;
+
+ AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;
+ AVVulkanDeviceContext *device_hwctx = device_ctx->hwctx;
+
+ device_ctx->user_opaque = (void *)vk->vulkan;
+ device_hwctx->lock_queue = lock_queue;
+ device_hwctx->unlock_queue = unlock_queue;
+ device_hwctx->get_proc_addr = vk->vkinst->get_proc_addr;
+ device_hwctx->inst = vk->vkinst->instance;
+ device_hwctx->phys_dev = vk->vulkan->phys_device;
+ device_hwctx->act_dev = vk->vulkan->device;
+ device_hwctx->device_features = *vk->vulkan->features;
+ device_hwctx->enabled_inst_extensions = vk->vkinst->extensions;
+ device_hwctx->nb_enabled_inst_extensions = vk->vkinst->num_extensions;
+ device_hwctx->enabled_dev_extensions = vk->vulkan->extensions;
+ device_hwctx->nb_enabled_dev_extensions = vk->vulkan->num_extensions;
+ device_hwctx->queue_family_index = vk->vulkan->queue_graphics.index;
+ device_hwctx->nb_graphics_queues = vk->vulkan->queue_graphics.count;
+ device_hwctx->queue_family_tx_index = vk->vulkan->queue_transfer.index;
+ device_hwctx->nb_tx_queues = vk->vulkan->queue_transfer.count;
+ device_hwctx->queue_family_comp_index = vk->vulkan->queue_compute.index;
+ device_hwctx->nb_comp_queues = vk->vulkan->queue_compute.count;
+ device_hwctx->queue_family_decode_index = decode_index;
+ device_hwctx->nb_decode_queues = decode_count;
+
+ ret = av_hwdevice_ctx_init(hw_device_ctx);
+ if (ret < 0) {
+ MP_ERR(hw, "av_hwdevice_ctx_init failed\n");
+ goto error;
+ }
+
+ p->hwctx = (struct mp_hwdec_ctx) {
+ .driver_name = hw->driver->name,
+ .av_device_ref = hw_device_ctx,
+ .hw_imgfmt = IMGFMT_VULKAN,
+ };
+ hwdec_devices_add(hw->devs, &p->hwctx);
+
+ talloc_free(qf);
+ return 0;
+
+ error:
+ talloc_free(qf);
+ av_buffer_unref(&hw_device_ctx);
+ return -1;
+}
+
+static void vulkan_uninit(struct ra_hwdec *hw)
+{
+ struct vulkan_hw_priv *p = hw->priv;
+
+ hwdec_devices_remove(hw->devs, &p->hwctx);
+ av_buffer_unref(&p->hwctx.av_device_ref);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+ struct vulkan_mapper_priv *p = mapper->priv;
+
+ mapper->dst_params = mapper->src_params;
+ mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+ mapper->dst_params.hw_subfmt = 0;
+
+ mp_image_set_params(&p->layout, &mapper->dst_params);
+
+ struct ra_imgfmt_desc desc = {0};
+ if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+ return -1;
+
+ return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+ struct vulkan_hw_priv *p_owner = mapper->owner->priv;
+ struct vulkan_mapper_priv *p = mapper->priv;
+ if (!mapper->src)
+ goto end;
+
+ AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;;
+ const AVVulkanFramesContext *vkfc = hwfc->hwctx;;
+ AVVkFrame *vkf = p->vkf;
+
+ int num_images;
+ for (num_images = 0; (vkf->img[num_images] != NULL); num_images++);
+
+ for (int i = 0; (p->tex[i] != NULL); i++) {
+ pl_tex *tex = &p->tex[i];
+ if (!*tex)
+ continue;
+
+ // If we have multiple planes and one image, then that is a multiplane
+ // frame. Anything else is treated as one-image-per-plane.
+ int index = p->layout.num_planes > 1 && num_images == 1 ? 0 : i;
+
+ // Update AVVkFrame state to reflect current layout
+ bool ok = pl_vulkan_hold_ex(p_owner->gpu, pl_vulkan_hold_params(
+ .tex = *tex,
+ .out_layout = &vkf->layout[index],
+ .qf = VK_QUEUE_FAMILY_IGNORED,
+ .semaphore = (pl_vulkan_sem) {
+ .sem = vkf->sem[index],
+ .value = vkf->sem_value[index] + 1,
+ },
+ ));
+
+ vkf->access[index] = 0;
+ vkf->sem_value[index] += !!ok;
+ *tex = NULL;
+ }
+
+ vkfc->unlock_frame(hwfc, vkf);
+
+ end:
+ for (int i = 0; i < p->layout.num_planes; i++)
+ ra_tex_free(mapper->ra, &mapper->tex[i]);
+
+ p->vkf = NULL;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+ bool result = false;
+ struct vulkan_hw_priv *p_owner = mapper->owner->priv;
+ struct vulkan_mapper_priv *p = mapper->priv;
+ pl_vulkan vk = pl_vulkan_get(p_owner->gpu);
+ if (!vk)
+ return -1;
+
+ AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;
+ const AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *) mapper->src->planes[0];
+
+ /*
+ * We need to use the dimensions from the HW Frames Context for the
+ * textures, as the underlying images may be larger than the logical frame
+ * size. This most often happens with 1080p content where the actual frame
+ * height is 1088.
+ */
+ struct mp_image raw_layout;
+ mp_image_setfmt(&raw_layout, p->layout.params.imgfmt);
+ mp_image_set_size(&raw_layout, hwfc->width, hwfc->height);
+
+ int num_images;
+ for (num_images = 0; (vkf->img[num_images] != NULL); num_images++);
+ const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
+
+ vkfc->lock_frame(hwfc, vkf);
+
+ for (int i = 0; i < p->layout.num_planes; i++) {
+ pl_tex *tex = &p->tex[i];
+ VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+ int index = i;
+
+ // If we have multiple planes and one image, then that is a multiplane
+ // frame. Anything else is treated as one-image-per-plane.
+ if (p->layout.num_planes > 1 && num_images == 1) {
+ index = 0;
+
+ switch (i) {
+ case 0:
+ aspect = VK_IMAGE_ASPECT_PLANE_0_BIT_KHR;
+ break;
+ case 1:
+ aspect = VK_IMAGE_ASPECT_PLANE_1_BIT_KHR;
+ break;
+ case 2:
+ aspect = VK_IMAGE_ASPECT_PLANE_2_BIT_KHR;
+ break;
+ default:
+ goto error;
+ }
+ }
+
+ *tex = pl_vulkan_wrap(p_owner->gpu, pl_vulkan_wrap_params(
+ .image = vkf->img[index],
+ .width = mp_image_plane_w(&raw_layout, i),
+ .height = mp_image_plane_h(&raw_layout, i),
+ .format = vk_fmt[i],
+ .usage = vkfc->usage,
+ .aspect = aspect,
+ ));
+ if (!*tex)
+ goto error;
+
+ pl_vulkan_release_ex(p_owner->gpu, pl_vulkan_release_params(
+ .tex = p->tex[i],
+ .layout = vkf->layout[index],
+ .qf = VK_QUEUE_FAMILY_IGNORED,
+ .semaphore = (pl_vulkan_sem) {
+ .sem = vkf->sem[index],
+ .value = vkf->sem_value[index],
+ },
+ ));
+
+ struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+ result = mppl_wrap_tex(mapper->ra, *tex, ratex);
+ if (!result) {
+ pl_tex_destroy(p_owner->gpu, tex);
+ talloc_free(ratex);
+ goto error;
+ }
+ mapper->tex[i] = ratex;
+ }
+
+ p->vkf = vkf;
+ return 0;
+
+ error:
+ vkfc->unlock_frame(hwfc, vkf);
+ mapper_unmap(mapper);
+ return -1;
+}
+
+const struct ra_hwdec_driver ra_hwdec_vulkan = {
+ .name = "vulkan",
+ .imgfmts = {IMGFMT_VULKAN, 0},
+ .priv_size = sizeof(struct vulkan_hw_priv),
+ .init = vulkan_init,
+ .uninit = vulkan_uninit,
+ .mapper = &(const struct ra_hwdec_mapper_driver){
+ .priv_size = sizeof(struct vulkan_mapper_priv),
+ .init = mapper_init,
+ .uninit = mapper_uninit,
+ .map = mapper_map,
+ .unmap = mapper_unmap,
+ },
+};