6 files changed, 456 insertions, 193 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 4b0cbcc1c4..00692f0350 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -42,7 +42,8 @@ static bool is_software_gl(GL *gl)
            strcmp(renderer, "Software Rasterizer") == 0 ||
            strstr(renderer, "llvmpipe") ||
            strcmp(vendor, "Microsoft Corporation") == 0 ||
-           strcmp(renderer, "Mesa X11") == 0;
+           strcmp(renderer, "Mesa X11") == 0 ||
+           strcmp(renderer, "Apple Software Renderer") == 0;
 }
 
 static void GLAPIENTRY dummy_glBindFramebuffer(GLenum target, GLuint framebuffer)
diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c
index 1ba7f5dd05..6aa3d95e79 100644
--- a/video/out/opengl/context_drm_egl.c
+++ b/video/out/opengl/context_drm_egl.c
@@ -81,7 +81,7 @@ struct priv {
     struct vt_switcher vt_switcher;
 
     struct mpv_opengl_drm_params drm_params;
-    struct mpv_opengl_drm_osd_size osd_size;
+    struct mpv_opengl_drm_draw_surface_size draw_surface_size;
 };
 
 // Not general. Limited to only the formats being used in this module
@@ -195,11 +195,11 @@ static bool init_gbm(struct ra_ctx *ctx)
     }
 
     MP_VERBOSE(ctx->vo, "Initializing GBM surface (%d x %d)\n",
-        p->osd_size.width, p->osd_size.height);
+        p->draw_surface_size.width, p->draw_surface_size.height);
     p->gbm.surface = gbm_surface_create(
         p->gbm.device,
-        p->osd_size.width,
-        p->osd_size.height,
+        p->draw_surface_size.width,
+        p->draw_surface_size.height,
         p->gbm_format,
         GBM_BO_USE_SCANOUT | GBM_BO_USE_RENDERING);
     if (!p->gbm.surface) {
@@ -252,53 +252,54 @@ static bool crtc_setup_atomic(struct ra_ctx *ctx)
     struct priv *p = ctx->priv;
     struct drm_atomic_context *atomic_ctx = p->kms->atomic_context;
 
+    if (!drm_atomic_save_old_state(atomic_ctx)) {
+        MP_WARN(ctx->vo, "Failed to save old DRM atomic state\n");
+    }
+
     drmModeAtomicReqPtr request = drmModeAtomicAlloc();
-    if (request) {
-        if (drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", p->kms->crtc_id) < 0) {
-            MP_ERR(ctx->vo, "Could not set CRTC_ID on connector\n");
-            return false;
-        }
+    if (!request) {
+        MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n");
+        return false;
+    }
 
-        uint32_t blob_id;
-        if (drmModeCreatePropertyBlob(p->kms->fd, &p->kms->mode, sizeof(drmModeModeInfo),
-                                      &blob_id) != 0) {
-            MP_ERR(ctx->vo, "Failed to create DRM mode blob\n");
-            return false;
-        }
-        if (drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", blob_id) < 0) {
-            MP_ERR(ctx->vo, "Could not set MODE_ID on crtc\n");
-            return false;
-        }
-        if (drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1) < 0) {
-            MP_ERR(ctx->vo, "Could not set ACTIVE on crtc\n");
-            return false;
-        }
+    if (drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", p->kms->crtc_id) < 0) {
+        MP_ERR(ctx->vo, "Could not set CRTC_ID on connector\n");
+        return false;
+    }
 
-        drm_object_set_property(request, atomic_ctx->osd_plane, "FB_ID", p->fb->id);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_ID", p->kms->crtc_id);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_X",   0);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_Y",   0);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_W",   p->osd_size.width << 16);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_H",   p->osd_size.height << 16);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_X",  0);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_Y",  0);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_W",  p->kms->mode.hdisplay);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_H",  p->kms->mode.vdisplay);
-
-        int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL);
-        if (ret) {
-           MP_ERR(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret);
-           drmModeAtomicFree(request);
-           return false;
-        }
+    if (!drm_mode_ensure_blob(p->kms->fd, &p->kms->mode)) {
+        MP_ERR(ctx->vo, "Failed to create DRM mode blob\n");
+        goto err;
+    }
+    if (drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", p->kms->mode.blob_id) < 0) {
+        MP_ERR(ctx->vo, "Could not set MODE_ID on crtc\n");
+        goto err;
+    }
+    if (drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1) < 0) {
+        MP_ERR(ctx->vo, "Could not set ACTIVE on crtc\n");
+        goto err;
+    }
 
-        drmModeAtomicFree(request);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "FB_ID", p->fb->id);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_ID", p->kms->crtc_id);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_X",   0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_Y",   0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_W",   p->draw_surface_size.width << 16);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_H",   p->draw_surface_size.height << 16);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_X",  0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_Y",  0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_W",  p->kms->mode.mode.hdisplay);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_H",  p->kms->mode.mode.vdisplay);
 
-        return ret == 0;
-    } else {
-        MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n");
-    }
+    int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL);
+    if (ret)
+        MP_ERR(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret);
 
+    drmModeAtomicFree(request);
+    return ret == 0;
+
+  err:
+    drmModeAtomicFree(request);
     return false;
 }
 
@@ -308,32 +309,22 @@ static bool crtc_release_atomic(struct ra_ctx *ctx)
 
     struct drm_atomic_context *atomic_ctx = p->kms->atomic_context;
     drmModeAtomicReqPtr request = drmModeAtomicAlloc();
-    if (request) {
-        drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", p->old_crtc->crtc_id);
-
-        uint32_t blob_id;
-        if (drmModeCreatePropertyBlob(p->kms->fd, &p->old_crtc->mode, sizeof(drmModeModeInfo),
-                                      &blob_id) != 0) {
-            MP_ERR(ctx->vo, "Failed to create DRM mode blob\n");
-            return false;
-        }
-        drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", blob_id);
-        drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1);
-        drm_object_set_property(request, atomic_ctx->osd_plane, "FB_ID", p->old_crtc->buffer_id);
+    if (!request) {
+        MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n");
+        return false;
+    }
 
-        int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL);
+    if (!drm_atomic_restore_old_state(request, atomic_ctx)) {
+        MP_WARN(ctx->vo, "Got error while restoring old state\n");
+    }
 
-        if (ret)
-           MP_WARN(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret);
+    int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL);
 
-        drmModeAtomicFree(request);
+    if (ret)
+        MP_WARN(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret);
 
-        return ret == 0;
-    } else {
-        MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n");
-    }
-
-    return false;
+    drmModeAtomicFree(request);
+    return ret == 0;
 }
 
 static bool crtc_setup(struct ra_ctx *ctx)
@@ -341,16 +332,16 @@ static bool crtc_setup(struct ra_ctx *ctx)
     struct priv *p = ctx->priv;
     if (p->active)
         return true;
-    p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id);
 
     if (p->kms->atomic_context) {
         int ret = crtc_setup_atomic(ctx);
         p->active = true;
         return ret;
     } else {
+        p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id);
         int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, p->fb->id,
                                  0, 0, &p->kms->connector->connector_id, 1,
-                                 &p->kms->mode);
+                                 &p->kms->mode.mode);
         p->active = true;
         return ret == 0;
     }
@@ -373,19 +364,21 @@ static void crtc_release(struct ra_ctx *ctx)
         }
     }
 
-    if (p->old_crtc) {
-        if (p->kms->atomic_context) {
+    if (p->kms->atomic_context) {
+        if (p->kms->atomic_context->old_state.saved) {
             if (!crtc_release_atomic(ctx))
                 MP_ERR(ctx->vo, "Failed to restore previous mode\n");
-        } else {
+        }
+    } else {
+        if (p->old_crtc) {
             drmModeSetCrtc(p->kms->fd,
                            p->old_crtc->crtc_id, p->old_crtc->buffer_id,
                            p->old_crtc->x, p->old_crtc->y,
                            &p->kms->connector->connector_id, 1,
                            &p->old_crtc->mode);
+            drmModeFreeCrtc(p->old_crtc);
+            p->old_crtc = NULL;
         }
-        drmModeFreeCrtc(p->old_crtc);
-        p->old_crtc = NULL;
     }
 }
 
@@ -453,9 +446,9 @@ static void drm_egl_swap_buffers(struct ra_ctx *ctx)
     update_framebuffer_from_bo(ctx, p->gbm.next_bo);
 
     if (atomic_ctx) {
-        drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "FB_ID", p->fb->id);
-        drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "CRTC_ID", atomic_ctx->crtc->id);
-        drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "ZPOS", 1);
+        drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "FB_ID", p->fb->id);
+        drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "CRTC_ID", atomic_ctx->crtc->id);
+        drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "ZPOS", 1);
 
         ret = drmModeAtomicCommit(p->kms->fd, atomic_ctx->request,
                                   DRM_MODE_ATOMIC_NONBLOCK | DRM_MODE_PAGE_FLIP_EVENT, NULL);
@@ -520,30 +513,32 @@ static void drm_egl_uninit(struct ra_ctx *ctx)
     p->egl.context = EGL_NO_CONTEXT;
     eglDestroyContext(p->egl.display, p->egl.context);
 
+    close(p->drm_params.render_fd);
+
     if (p->kms) {
         kms_destroy(p->kms);
         p->kms = 0;
     }
 }
 
-// If the OSD plane supports ARGB we want to use that, but if it doesn't we fall
-// back on XRGB. If the driver does not support atomic there is no particular
-// reason to be using ARGB (drmprime hwdec will not work without atomic,
-// anyway), so we fall back to XRGB (another reason is that we do not have the
-// convenient atomic_ctx and its convenient plane fields).
+// If the draw plane supports ARGB we want to use that, but if it doesn't we
+// fall back on XRGB. If we do not have atomic there is no particular reason to
+// be using ARGB (drmprime hwdec will not work without atomic, anyway), so we
+// fall back to XRGB (another reason is that we do not have the convenient
+// atomic_ctx and its convenient plane fields).
 static bool probe_gbm_format(struct ra_ctx *ctx, uint32_t argb_format, uint32_t xrgb_format)
 {
     struct priv *p = ctx->priv;
 
     if (!p->kms->atomic_context) {
         p->gbm_format = xrgb_format;
-        MP_VERBOSE(ctx->vo, "Not using DRM Atomic: Use %s for OSD plane.\n",
+        MP_VERBOSE(ctx->vo, "Not using DRM Atomic: Use %s for draw plane.\n",
                    gbm_format_to_string(xrgb_format));
         return true;
     }
 
     drmModePlane *drmplane =
-        drmModeGetPlane(p->kms->fd, p->kms->atomic_context->osd_plane->id);
+        drmModeGetPlane(p->kms->fd, p->kms->atomic_context->draw_plane->id);
     bool have_argb = false;
     bool have_xrgb = false;
     bool result = false;
@@ -557,11 +552,11 @@ static bool probe_gbm_format(struct ra_ctx *ctx, uint32_t argb_format, uint32_t
 
     if (have_argb) {
         p->gbm_format = argb_format;
-        MP_VERBOSE(ctx->vo, "%s supported by OSD plane.\n", gbm_format_to_string(argb_format));
+        MP_VERBOSE(ctx->vo, "%s supported by draw plane.\n", gbm_format_to_string(argb_format));
         result = true;
     } else if (have_xrgb) {
         p->gbm_format = xrgb_format;
-        MP_VERBOSE(ctx->vo, "%s not supported by OSD plane: Falling back to %s.\n",
+        MP_VERBOSE(ctx->vo, "%s not supported by draw plane: Falling back to %s.\n",
                    gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format));
         result = true;
     }
@@ -591,25 +586,25 @@ static bool drm_egl_init(struct ra_ctx *ctx)
     MP_VERBOSE(ctx, "Initializing KMS\n");
     p->kms = kms_create(ctx->log, ctx->vo->opts->drm_opts->drm_connector_spec,
                         ctx->vo->opts->drm_opts->drm_mode_id,
-                        ctx->vo->opts->drm_opts->drm_osd_plane_id,
-                        ctx->vo->opts->drm_opts->drm_video_plane_id);
+                        ctx->vo->opts->drm_opts->drm_draw_plane,
+                        ctx->vo->opts->drm_opts->drm_drmprime_video_plane);
     if (!p->kms) {
         MP_ERR(ctx, "Failed to create KMS.\n");
         return false;
     }
 
-    if (ctx->vo->opts->drm_opts->drm_osd_size.wh_valid) {
+    if (ctx->vo->opts->drm_opts->drm_draw_surface_size.wh_valid) {
         if (p->kms->atomic_context) {
-            p->osd_size.width = ctx->vo->opts->drm_opts->drm_osd_size.w;
-            p->osd_size.height = ctx->vo->opts->drm_opts->drm_osd_size.h;
+            p->draw_surface_size.width = ctx->vo->opts->drm_opts->drm_draw_surface_size.w;
+            p->draw_surface_size.height = ctx->vo->opts->drm_opts->drm_draw_surface_size.h;
         } else {
-            p->osd_size.width = p->kms->mode.hdisplay;
-            p->osd_size.height = p->kms->mode.vdisplay;
-            MP_WARN(ctx, "Setting OSD size is only available with DRM atomic, defaulting to screen resolution\n");
+            p->draw_surface_size.width = p->kms->mode.mode.hdisplay;
+            p->draw_surface_size.height = p->kms->mode.mode.vdisplay;
+            MP_WARN(ctx, "Setting draw plane size is only available with DRM atomic, defaulting to screen resolution\n");
         }
     } else {
-        p->osd_size.width = p->kms->mode.hdisplay;
-        p->osd_size.height = p->kms->mode.vdisplay;
+        p->draw_surface_size.width = p->kms->mode.mode.hdisplay;
+        p->draw_surface_size.height = p->kms->mode.mode.vdisplay;
     }
 
     uint32_t argb_format;
@@ -623,7 +618,7 @@ static bool drm_egl_init(struct ra_ctx *ctx)
     }
 
     if (!probe_gbm_format(ctx, argb_format, xrgb_format)) {
-        MP_ERR(ctx->vo, "No suitable format found on DRM primary plane (tried: %s and %s).\n",
+        MP_ERR(ctx->vo, "No suitable format found on draw plane (tried: %s and %s).\n",
                gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format));
         return false;
     }
@@ -671,6 +666,20 @@ static bool drm_egl_init(struct ra_ctx *ctx)
     p->drm_params.connector_id = p->kms->connector->connector_id;
     if (p->kms->atomic_context)
         p->drm_params.atomic_request_ptr = &p->kms->atomic_context->request;
+    char *rendernode_path = drmGetRenderDeviceNameFromFd(p->kms->fd);
+    if (rendernode_path) {
+        MP_VERBOSE(ctx, "Opening render node \"%s\"\n", rendernode_path);
+        p->drm_params.render_fd = open(rendernode_path, O_RDWR | O_CLOEXEC);
+        if (p->drm_params.render_fd < 0) {
+            MP_WARN(ctx, "Cannot open render node \"%s\": %s. VAAPI hwdec will be disabled\n",
+                    rendernode_path, mp_strerror(errno));
+        }
+        free(rendernode_path);
+    } else {
+        p->drm_params.render_fd = -1;
+        MP_VERBOSE(ctx, "Could not find path to render node. VAAPI hwdec will be disabled\n");
+    }
+
     struct ra_gl_ctx_params params = {
         .swap_buffers = drm_egl_swap_buffers,
         .external_swapchain = p->kms->atomic_context ? &drm_atomic_swapchain :
@@ -680,7 +689,7 @@ static bool drm_egl_init(struct ra_ctx *ctx)
         return false;
 
     ra_add_native_resource(ctx->ra, "drm_params", &p->drm_params);
-    ra_add_native_resource(ctx->ra, "drm_osd_size", &p->osd_size);
+    ra_add_native_resource(ctx->ra, "drm_draw_surface_size", &p->draw_surface_size);
 
     return true;
 }
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c
index f80c14500a..fee1f83f98 100644
--- a/video/out/opengl/hwdec_cuda.c
+++ b/video/out/opengl/hwdec_cuda.c
@@ -19,28 +19,53 @@
 
 /*
  * This hwdec implements an optimized output path using CUDA->OpenGL
- * interop for frame data that is stored in CUDA device memory.
- * Although it is not explicit in the code here, the only practical way
- * to get data in this form is from the 'cuvid' decoder (aka NvDecode).
- *
- * For now, cuvid/NvDecode will always return images in NV12 format, even
- * when decoding 10bit streams (there is some hardware dithering going on).
+ * or CUDA->Vulkan interop for frame data that is stored in CUDA
+ * device memory. Although it is not explicit in the code here, the
+ * only practical way to get data in this form is from the
+ * nvdec/cuvid decoder.
  */
 
+#include <unistd.h>
+
 #include <ffnvcodec/dynlink_loader.h>
 #include <libavutil/hwcontext.h>
 #include <libavutil/hwcontext_cuda.h>
 
 #include "video/out/gpu/hwdec.h"
+#include "video/out/gpu/utils.h"
 #include "formats.h"
 #include "options/m_config.h"
+#if HAVE_GL
 #include "ra_gl.h"
+#endif
+#if HAVE_VULKAN
+#include "video/out/vulkan/formats.h"
+#include "video/out/vulkan/ra_vk.h"
+#include "video/out/vulkan/utils.h"
+#endif
+
+#if HAVE_WIN32_DESKTOP
+#include <versionhelpers.h>
+#endif
 
 struct priv_owner {
     struct mp_hwdec_ctx hwctx;
     CudaFunctions *cu;
     CUcontext display_ctx;
     CUcontext decode_ctx;
+
+    bool is_gl;
+    bool is_vk;
+};
+
+struct ext_buf {
+#if HAVE_WIN32_DESKTOP
+    HANDLE handle;
+#else
+    int fd;
+#endif
+    CUexternalMemory mem;
+    CUdeviceptr buf;
 };
 
 struct priv {
@@ -49,6 +74,9 @@ struct priv {
     CUarray cu_array[4];
 
     CUcontext display_ctx;
+
+    struct ra_buf_params buf_params[4];
+    struct ra_buf_pool buf_pool[4];
 };
 
 static int check_cu(struct ra_hwdec *hw, CUresult err, const char *func)
@@ -81,17 +109,34 @@ static int cuda_init(struct ra_hwdec *hw)
     CUdevice display_dev;
     AVBufferRef *hw_device_ctx = NULL;
     CUcontext dummy;
-    unsigned int device_count;
     int ret = 0;
     struct priv_owner *p = hw->priv;
     CudaFunctions *cu;
 
-    if (!ra_is_gl(hw->ra))
-        return -1;
+#if HAVE_GL
+    p->is_gl = ra_is_gl(hw->ra);
+    if (p->is_gl) {
+        GL *gl = ra_gl_get(hw->ra);
+        if (gl->version < 210 && gl->es < 300) {
+            MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
+            return -1;
+        }
+    }
+#endif
+
+#if HAVE_VULKAN
+    p->is_vk = ra_vk_get(hw->ra) != NULL;
+    if (p->is_vk) {
+        if (!ra_vk_get(hw->ra)->has_ext_external_memory_export) {
+            MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires the %s extension\n",
+                       MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
+            return -1;
+        }
+    }
+#endif
 
-    GL *gl = ra_gl_get(hw->ra);
-    if (gl->version < 210 && gl->es < 300) {
-        MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
+    if (!p->is_gl && !p->is_vk) {
+        MP_VERBOSE(hw, "CUDA hwdec only works with OpenGL or Vulkan backends.\n");
         return -1;
     }
 
@@ -102,46 +147,96 @@ static int cuda_init(struct ra_hwdec *hw)
     }
     cu = p->cu;
 
+    if (p->is_vk && !cu->cuImportExternalMemory) {
+        MP_ERR(hw, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n");
+        return -1;
+    }
+
     ret = CHECK_CU(cu->cuInit(0));
     if (ret < 0)
-        goto error;
+        return -1;
 
     // Allocate display context
-    ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
-                                      CU_GL_DEVICE_LIST_ALL));
-    if (ret < 0)
-        goto error;
+    if (p->is_gl) {
+        unsigned int device_count;
+        ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
+                                          CU_GL_DEVICE_LIST_ALL));
+        if (ret < 0)
+            return -1;
 
-    ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
-                                   display_dev));
-    if (ret < 0)
-        goto error;
+        ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+                                       display_dev));
+        if (ret < 0)
+            return -1;
 
-    p->decode_ctx = p->display_ctx;
+        p->decode_ctx = p->display_ctx;
 
-    int decode_dev_idx = -1;
-    mp_read_option_raw(hw->global, "cuda-decode-device", &m_option_type_choice,
-                       &decode_dev_idx);
+        int decode_dev_idx = -1;
+        mp_read_option_raw(hw->global, "cuda-decode-device", &m_option_type_choice,
+                           &decode_dev_idx);
 
-    if (decode_dev_idx > -1) {
-        CUdevice decode_dev;
-        ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx));
-        if (ret < 0)
-            goto error;
+        if (decode_dev_idx > -1) {
+            CUdevice decode_dev;
+            ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx));
+            if (ret < 0)
+                goto error;
+
+            if (decode_dev != display_dev) {
+                MP_INFO(hw, "Using separate decoder and display devices\n");
 
-        if (decode_dev != display_dev) {
-            MP_INFO(hw, "Using separate decoder and display devices\n");
+                // Pop the display context. We won't use it again during init()
+                ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+                if (ret < 0)
+                    return -1;
+
+                ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+                                               decode_dev));
+                if (ret < 0)
+                    return -1;
+            }
+        }
+    } else if (p->is_vk) {
+#if HAVE_VULKAN
+        uint8_t vk_uuid[VK_UUID_SIZE];
+        struct mpvk_ctx *vk = ra_vk_get(hw->ra);
+
+        mpvk_get_phys_device_uuid(vk, vk_uuid);
+
+        int count;
+        ret = CHECK_CU(cu->cuDeviceGetCount(&count));
+        if (ret < 0)
+            return -1;
 
-            // Pop the display context. We won't use it again during init()
-            ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+        display_dev = -1;
+        for (int i = 0; i < count; i++) {
+            CUdevice dev;
+            ret = CHECK_CU(cu->cuDeviceGet(&dev, i));
             if (ret < 0)
-                goto error;
+                continue;
 
-            ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
-                                           decode_dev));
+            CUuuid uuid;
+            ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev));
             if (ret < 0)
-                goto error;
+                continue;
+
+            if (memcmp(vk_uuid, uuid.bytes, VK_UUID_SIZE) == 0) {
+                display_dev = dev;
+                break;
+            }
         }
+
+        if (display_dev == -1) {
+            MP_ERR(hw, "Could not match Vulkan display device in CUDA.\n");
+            return -1;
+        }
+
+        ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+                                       display_dev));
+        if (ret < 0)
+            return -1;
+
+        p->decode_ctx = p->display_ctx;
+#endif
     }
 
     hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
@@ -197,6 +292,106 @@ static void cuda_uninit(struct ra_hwdec *hw)
 #undef CHECK_CU
 #define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
 
+#if HAVE_VULKAN
+static struct ra_buf *cuda_buf_pool_get(struct ra_hwdec_mapper *mapper, int n)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+    int ret = 0;
+
+    struct ra_buf_pool *pool = &p->buf_pool[n];
+    struct ra_buf *buf = ra_buf_pool_get(mapper->ra, pool, &p->buf_params[n]);
+    if (!buf) {
+        goto error;
+    }
+
+    if (!ra_vk_buf_get_user_data(buf)) {
+        struct ext_buf *ebuf = talloc_zero(NULL, struct ext_buf);
+        struct vk_external_mem mem_info;
+
+        bool success = ra_vk_buf_get_external_info(mapper->ra, buf, &mem_info);
+        if (!success) {
+            ret = -1;
+            goto error;
+        }
+
+#if HAVE_WIN32_DESKTOP
+        ebuf->handle = mem_info.mem_handle;
+        MP_DBG(mapper, "vk_external_info[%d][%d]: %p %zu %zu\n", n, pool->index, ebuf->handle, mem_info.size, mem_info.offset);
+#else
+        ebuf->fd = mem_info.mem_fd;
+        MP_DBG(mapper, "vk_external_info[%d][%d]: %d %zu %zu\n", n, pool->index, ebuf->fd, mem_info.size, mem_info.offset);
+#endif
+
+        CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
+#if HAVE_WIN32_DESKTOP
+            .type = IsWindows8OrGreater()
+                ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
+                : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
+            .handle.win32.handle = ebuf->handle,
+#else
+            .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
+            .handle.fd = ebuf->fd,
+#endif
+            .size = mem_info.mem_size,
+            .flags = 0,
+        };
+        ret = CHECK_CU(cu->cuImportExternalMemory(&ebuf->mem, &ext_desc));
+        if (ret < 0)
+            goto error;
+
+        CUDA_EXTERNAL_MEMORY_BUFFER_DESC buf_desc = {
+            .offset = mem_info.offset,
+            .size = mem_info.size,
+            .flags = 0,
+        };
+        ret = CHECK_CU(cu->cuExternalMemoryGetMappedBuffer(&ebuf->buf, ebuf->mem, &buf_desc));
+        if (ret < 0)
+            goto error;
+
+        ra_vk_buf_set_user_data(buf, ebuf);
+    }
+    return buf;
+
+error:
+    MP_ERR(mapper, "cuda_buf_pool_get failed\n");
+    return NULL;
+}
+
+static void cuda_buf_pool_uninit(struct ra_hwdec_mapper *mapper, int n)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+
+    struct ra_buf_pool *pool = &p->buf_pool[n];
+    for (int i = 0; i < pool->num_buffers; i++) {
+        struct ra_buf *buf = pool->buffers[i];
+        struct ext_buf *ebuf = ra_vk_buf_get_user_data(buf);
+        if (ebuf) {
+            if (ebuf->mem > 0) {
+                CHECK_CU(cu->cuDestroyExternalMemory(ebuf->mem));
+#if HAVE_WIN32_DESKTOP
+            }
+            if (ebuf->handle) {
+                // Handle must always be closed by us.
+                CloseHandle(ebuf->handle);
+            }
+#else
+            } else if (ebuf->fd > -1) {
+                // fd should only be closed if external memory was not imported
+                close(ebuf->fd);
+            }
+#endif
+        }
+        talloc_free(ebuf);
+        ra_vk_buf_set_user_data(buf, NULL);
+    }
+    ra_buf_pool_uninit(mapper->ra, pool);
+}
+#endif // HAVE_VULKAN
+
 static int mapper_init(struct ra_hwdec_mapper *mapper)
 {
     struct priv_owner *p_owner = mapper->owner->priv;
@@ -243,27 +438,39 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
             goto error;
         }
 
-        GLuint texture;
-        GLenum target;
-        ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target);
+        if (p_owner->is_gl) {
+#if HAVE_GL
+            GLuint texture;
+            GLenum target;
+            ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target);
 
-        ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target,
-                                                     CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
-        if (ret < 0)
-            goto error;
+            ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target,
+                                                         CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
+            if (ret < 0)
+                goto error;
 
-        ret = CHECK_CU(cu->cuGraphicsMapResources(1, &p->cu_res[n], 0));
-        if (ret < 0)
-            goto error;
+            ret = CHECK_CU(cu->cuGraphicsMapResources(1, &p->cu_res[n], 0));
+            if (ret < 0)
+                goto error;
 
-        ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n],
-                                                               0, 0));
-        if (ret < 0)
-            goto error;
+            ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n],
+                                                                   0, 0));
+            if (ret < 0)
+                goto error;
 
-        ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &p->cu_res[n], 0));
-        if (ret < 0)
-            goto error;
+            ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &p->cu_res[n], 0));
+            if (ret < 0)
+                goto error;
+#endif
+        } else if (p_owner->is_vk) {
+            struct ra_buf_params buf_params = {
+                .type = RA_BUF_TYPE_SHARED_MEMORY,
+                .size = mp_image_plane_h(&p->layout, n) *
+                        mp_image_plane_w(&p->layout, n) *
+                        mapper->tex[n]->params.format->pixel_size,
+            };
+            p->buf_params[n] = buf_params;
+        }
     }
 
  error:
@@ -288,6 +495,10 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper)
             CHECK_CU(cu->cuGraphicsUnregisterResource(p->cu_res[n]));
         p->cu_res[n] = 0;
         ra_tex_free(mapper->ra, &mapper->tex[n]);
+
+#if HAVE_VULKAN
+        cuda_buf_pool_uninit(mapper, n);
+#endif
     }
     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 }
@@ -303,28 +514,54 @@ static int mapper_map(struct ra_hwdec_mapper *mapper)
     CudaFunctions *cu = p_owner->cu;
     CUcontext dummy;
     int ret = 0, eret = 0;
+    bool is_gl = p_owner->is_gl;
+    bool is_vk = p_owner->is_vk;
 
     ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
     if (ret < 0)
         return ret;
 
     for (int n = 0; n < p->layout.num_planes; n++) {
+        struct ra_buf *buf = NULL;
+
         CUDA_MEMCPY2D cpy = {
             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
-            .dstMemoryType = CU_MEMORYTYPE_ARRAY,
             .srcDevice     = (CUdeviceptr)mapper->src->planes[n],
             .srcPitch      = mapper->src->stride[n],
             .srcY          = 0,
-            .dstArray      = p->cu_array[n],
             .WidthInBytes  = mp_image_plane_w(&p->layout, n) *
                              mapper->tex[n]->params.format->pixel_size,
             .Height        = mp_image_plane_h(&p->layout, n),
         };
+
+        if (is_gl) {
+            cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+            cpy.dstArray = p->cu_array[n];
+        } else if (is_vk) {
+#if HAVE_VULKAN
+            buf = cuda_buf_pool_get(mapper, n);
+            struct ext_buf *ebuf = ra_vk_buf_get_user_data(buf);
+
+            cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+            cpy.dstDevice = ebuf->buf;
+            cpy.dstPitch  = mp_image_plane_w(&p->layout, n) *
+                            mapper->tex[n]->params.format->pixel_size;
+#endif
+        }
+
         ret = CHECK_CU(cu->cuMemcpy2D(&cpy));
         if (ret < 0)
             goto error;
-    }
 
+        if (is_vk) {
+            struct ra_tex_upload_params params = {
+                .tex = mapper->tex[n],
+                .invalidate = true,
+                .buf = buf,
+            };
+            mapper->ra->fns->tex_upload(mapper->ra, &params);
+        }
+    }
 
  error:
    eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
diff --git a/video/out/opengl/hwdec_drmprime_drm.c b/video/out/opengl/hwdec_drmprime_drm.c
index 24753067d6..fd3d383c55 100644
--- a/video/out/opengl/hwdec_drmprime_drm.c
+++ b/video/out/opengl/hwdec_drmprime_drm.c
@@ -114,14 +114,17 @@ static void disable_video_plane(struct ra_hwdec *hw)
     if (!p->ctx)
         return;
 
-    // Disabling video plane is needed on some devices when using the
-    // primary plane for video. Primary buffer can't be active with no
-    // framebuffer associated. So we need this function to commit it
-    // right away as mpv will free all framebuffers on playback end.
+    if (!p->ctx->drmprime_video_plane)
+        return;
+
+    // Disabling the drmprime video plane is needed on some devices when using
+    // the primary plane for video. Primary buffer can't be active with no
+    // framebuffer associated. So we need this function to commit it right away
+    // as mpv will free all framebuffers on playback end.
     drmModeAtomicReqPtr request = drmModeAtomicAlloc();
     if (request) {
-        drm_object_set_property(request, p->ctx->video_plane, "FB_ID", 0);