diff options
Diffstat (limited to 'video/out/opengl')
-rw-r--r-- | video/out/opengl/common.c | 3 | ||||
-rw-r--r-- | video/out/opengl/context_drm_egl.c | 205 | ||||
-rw-r--r-- | video/out/opengl/hwdec_cuda.c | 353 | ||||
-rw-r--r-- | video/out/opengl/hwdec_drmprime_drm.c | 67 | ||||
-rw-r--r-- | video/out/opengl/hwdec_vaegl.c | 11 | ||||
-rw-r--r-- | video/out/opengl/ra_gl.c | 10 |
6 files changed, 456 insertions, 193 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index 4b0cbcc1c4..00692f0350 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -42,7 +42,8 @@ static bool is_software_gl(GL *gl) strcmp(renderer, "Software Rasterizer") == 0 || strstr(renderer, "llvmpipe") || strcmp(vendor, "Microsoft Corporation") == 0 || - strcmp(renderer, "Mesa X11") == 0; + strcmp(renderer, "Mesa X11") == 0 || + strcmp(renderer, "Apple Software Renderer") == 0; } static void GLAPIENTRY dummy_glBindFramebuffer(GLenum target, GLuint framebuffer) diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c index 1ba7f5dd05..6aa3d95e79 100644 --- a/video/out/opengl/context_drm_egl.c +++ b/video/out/opengl/context_drm_egl.c @@ -81,7 +81,7 @@ struct priv { struct vt_switcher vt_switcher; struct mpv_opengl_drm_params drm_params; - struct mpv_opengl_drm_osd_size osd_size; + struct mpv_opengl_drm_draw_surface_size draw_surface_size; }; // Not general. Limited to only the formats being used in this module @@ -195,11 +195,11 @@ static bool init_gbm(struct ra_ctx *ctx) } MP_VERBOSE(ctx->vo, "Initializing GBM surface (%d x %d)\n", - p->osd_size.width, p->osd_size.height); + p->draw_surface_size.width, p->draw_surface_size.height); p->gbm.surface = gbm_surface_create( p->gbm.device, - p->osd_size.width, - p->osd_size.height, + p->draw_surface_size.width, + p->draw_surface_size.height, p->gbm_format, GBM_BO_USE_SCANOUT | GBM_BO_USE_RENDERING); if (!p->gbm.surface) { @@ -252,53 +252,54 @@ static bool crtc_setup_atomic(struct ra_ctx *ctx) struct priv *p = ctx->priv; struct drm_atomic_context *atomic_ctx = p->kms->atomic_context; + if (!drm_atomic_save_old_state(atomic_ctx)) { + MP_WARN(ctx->vo, "Failed to save old DRM atomic state\n"); + } + drmModeAtomicReqPtr request = drmModeAtomicAlloc(); - if (request) { - if (drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", p->kms->crtc_id) < 0) { - MP_ERR(ctx->vo, "Could not set CRTC_ID on connector\n"); - return false; - } + if (!request) { + MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n"); + return false; + } - uint32_t blob_id; - if (drmModeCreatePropertyBlob(p->kms->fd, &p->kms->mode, sizeof(drmModeModeInfo), - &blob_id) != 0) { - MP_ERR(ctx->vo, "Failed to create DRM mode blob\n"); - return false; - } - if (drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", blob_id) < 0) { - MP_ERR(ctx->vo, "Could not set MODE_ID on crtc\n"); - return false; - } - if (drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1) < 0) { - MP_ERR(ctx->vo, "Could not set ACTIVE on crtc\n"); - return false; - } + if (drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", p->kms->crtc_id) < 0) { + MP_ERR(ctx->vo, "Could not set CRTC_ID on connector\n"); + return false; + } - drm_object_set_property(request, atomic_ctx->osd_plane, "FB_ID", p->fb->id); - drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_ID", p->kms->crtc_id); - drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_X", 0); - drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_Y", 0); - drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_W", p->osd_size.width << 16); - drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_H", p->osd_size.height << 16); - drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_X", 0); - drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_Y", 0); - drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_W", p->kms->mode.hdisplay); - drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_H", p->kms->mode.vdisplay); - - int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL); - if (ret) { - MP_ERR(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret); - drmModeAtomicFree(request); - return false; - } + if (!drm_mode_ensure_blob(p->kms->fd, &p->kms->mode)) { + MP_ERR(ctx->vo, "Failed to create DRM mode blob\n"); + goto err; + } + if (drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", p->kms->mode.blob_id) < 0) { + MP_ERR(ctx->vo, "Could not set MODE_ID on crtc\n"); + goto err; + } + if (drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1) < 0) { + MP_ERR(ctx->vo, "Could not set ACTIVE on crtc\n"); + goto err; + } - drmModeAtomicFree(request); + drm_object_set_property(request, atomic_ctx->draw_plane, "FB_ID", p->fb->id); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_ID", p->kms->crtc_id); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_X", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_Y", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_W", p->draw_surface_size.width << 16); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_H", p->draw_surface_size.height << 16); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_X", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_Y", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_W", p->kms->mode.mode.hdisplay); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_H", p->kms->mode.mode.vdisplay); - return ret == 0; - } else { - MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n"); - } + int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL); + if (ret) + MP_ERR(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret); + drmModeAtomicFree(request); + return ret == 0; + + err: + drmModeAtomicFree(request); return false; } @@ -308,32 +309,22 @@ static bool crtc_release_atomic(struct ra_ctx *ctx) struct drm_atomic_context *atomic_ctx = p->kms->atomic_context; drmModeAtomicReqPtr request = drmModeAtomicAlloc(); - if (request) { - drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", p->old_crtc->crtc_id); - - uint32_t blob_id; - if (drmModeCreatePropertyBlob(p->kms->fd, &p->old_crtc->mode, sizeof(drmModeModeInfo), - &blob_id) != 0) { - MP_ERR(ctx->vo, "Failed to create DRM mode blob\n"); - return false; - } - drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", blob_id); - drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1); - drm_object_set_property(request, atomic_ctx->osd_plane, "FB_ID", p->old_crtc->buffer_id); + if (!request) { + MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n"); + return false; + } - int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL); + if (!drm_atomic_restore_old_state(request, atomic_ctx)) { + MP_WARN(ctx->vo, "Got error while restoring old state\n"); + } - if (ret) - MP_WARN(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret); + int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL); - drmModeAtomicFree(request); + if (ret) + MP_WARN(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret); - return ret == 0; - } else { - MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n"); - } - - return false; + drmModeAtomicFree(request); + return ret == 0; } static bool crtc_setup(struct ra_ctx *ctx) @@ -341,16 +332,16 @@ static bool crtc_setup(struct ra_ctx *ctx) struct priv *p = ctx->priv; if (p->active) return true; - p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id); if (p->kms->atomic_context) { int ret = crtc_setup_atomic(ctx); p->active = true; return ret; } else { + p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id); int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, p->fb->id, 0, 0, &p->kms->connector->connector_id, 1, - &p->kms->mode); + &p->kms->mode.mode); p->active = true; return ret == 0; } @@ -373,19 +364,21 @@ static void crtc_release(struct ra_ctx *ctx) } } - if (p->old_crtc) { - if (p->kms->atomic_context) { + if (p->kms->atomic_context) { + if (p->kms->atomic_context->old_state.saved) { if (!crtc_release_atomic(ctx)) MP_ERR(ctx->vo, "Failed to restore previous mode\n"); - } else { + } + } else { + if (p->old_crtc) { drmModeSetCrtc(p->kms->fd, p->old_crtc->crtc_id, p->old_crtc->buffer_id, p->old_crtc->x, p->old_crtc->y, &p->kms->connector->connector_id, 1, &p->old_crtc->mode); + drmModeFreeCrtc(p->old_crtc); + p->old_crtc = NULL; } - drmModeFreeCrtc(p->old_crtc); - p->old_crtc = NULL; } } @@ -453,9 +446,9 @@ static void drm_egl_swap_buffers(struct ra_ctx *ctx) update_framebuffer_from_bo(ctx, p->gbm.next_bo); if (atomic_ctx) { - drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "FB_ID", p->fb->id); - drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "CRTC_ID", atomic_ctx->crtc->id); - drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "ZPOS", 1); + drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "FB_ID", p->fb->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "CRTC_ID", atomic_ctx->crtc->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "ZPOS", 1); ret = drmModeAtomicCommit(p->kms->fd, atomic_ctx->request, DRM_MODE_ATOMIC_NONBLOCK | DRM_MODE_PAGE_FLIP_EVENT, NULL); @@ -520,30 +513,32 @@ static void drm_egl_uninit(struct ra_ctx *ctx) p->egl.context = EGL_NO_CONTEXT; eglDestroyContext(p->egl.display, p->egl.context); + close(p->drm_params.render_fd); + if (p->kms) { kms_destroy(p->kms); p->kms = 0; } } -// If the OSD plane supports ARGB we want to use that, but if it doesn't we fall -// back on XRGB. If the driver does not support atomic there is no particular -// reason to be using ARGB (drmprime hwdec will not work without atomic, -// anyway), so we fall back to XRGB (another reason is that we do not have the -// convenient atomic_ctx and its convenient plane fields). +// If the draw plane supports ARGB we want to use that, but if it doesn't we +// fall back on XRGB. If we do not have atomic there is no particular reason to +// be using ARGB (drmprime hwdec will not work without atomic, anyway), so we +// fall back to XRGB (another reason is that we do not have the convenient +// atomic_ctx and its convenient plane fields). static bool probe_gbm_format(struct ra_ctx *ctx, uint32_t argb_format, uint32_t xrgb_format) { struct priv *p = ctx->priv; if (!p->kms->atomic_context) { p->gbm_format = xrgb_format; - MP_VERBOSE(ctx->vo, "Not using DRM Atomic: Use %s for OSD plane.\n", + MP_VERBOSE(ctx->vo, "Not using DRM Atomic: Use %s for draw plane.\n", gbm_format_to_string(xrgb_format)); return true; } drmModePlane *drmplane = - drmModeGetPlane(p->kms->fd, p->kms->atomic_context->osd_plane->id); + drmModeGetPlane(p->kms->fd, p->kms->atomic_context->draw_plane->id); bool have_argb = false; bool have_xrgb = false; bool result = false; @@ -557,11 +552,11 @@ static bool probe_gbm_format(struct ra_ctx *ctx, uint32_t argb_format, uint32_t if (have_argb) { p->gbm_format = argb_format; - MP_VERBOSE(ctx->vo, "%s supported by OSD plane.\n", gbm_format_to_string(argb_format)); + MP_VERBOSE(ctx->vo, "%s supported by draw plane.\n", gbm_format_to_string(argb_format)); result = true; } else if (have_xrgb) { p->gbm_format = xrgb_format; - MP_VERBOSE(ctx->vo, "%s not supported by OSD plane: Falling back to %s.\n", + MP_VERBOSE(ctx->vo, "%s not supported by draw plane: Falling back to %s.\n", gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format)); result = true; } @@ -591,25 +586,25 @@ static bool drm_egl_init(struct ra_ctx *ctx) MP_VERBOSE(ctx, "Initializing KMS\n"); p->kms = kms_create(ctx->log, ctx->vo->opts->drm_opts->drm_connector_spec, ctx->vo->opts->drm_opts->drm_mode_id, - ctx->vo->opts->drm_opts->drm_osd_plane_id, - ctx->vo->opts->drm_opts->drm_video_plane_id); + ctx->vo->opts->drm_opts->drm_draw_plane, + ctx->vo->opts->drm_opts->drm_drmprime_video_plane); if (!p->kms) { MP_ERR(ctx, "Failed to create KMS.\n"); return false; } - if (ctx->vo->opts->drm_opts->drm_osd_size.wh_valid) { + if (ctx->vo->opts->drm_opts->drm_draw_surface_size.wh_valid) { if (p->kms->atomic_context) { - p->osd_size.width = ctx->vo->opts->drm_opts->drm_osd_size.w; - p->osd_size.height = ctx->vo->opts->drm_opts->drm_osd_size.h; + p->draw_surface_size.width = ctx->vo->opts->drm_opts->drm_draw_surface_size.w; + p->draw_surface_size.height = ctx->vo->opts->drm_opts->drm_draw_surface_size.h; } else { - p->osd_size.width = p->kms->mode.hdisplay; - p->osd_size.height = p->kms->mode.vdisplay; - MP_WARN(ctx, "Setting OSD size is only available with DRM atomic, defaulting to screen resolution\n"); + p->draw_surface_size.width = p->kms->mode.mode.hdisplay; + p->draw_surface_size.height = p->kms->mode.mode.vdisplay; + MP_WARN(ctx, "Setting draw plane size is only available with DRM atomic, defaulting to screen resolution\n"); } } else { - p->osd_size.width = p->kms->mode.hdisplay; - p->osd_size.height = p->kms->mode.vdisplay; + p->draw_surface_size.width = p->kms->mode.mode.hdisplay; + p->draw_surface_size.height = p->kms->mode.mode.vdisplay; } uint32_t argb_format; @@ -623,7 +618,7 @@ static bool drm_egl_init(struct ra_ctx *ctx) } if (!probe_gbm_format(ctx, argb_format, xrgb_format)) { - MP_ERR(ctx->vo, "No suitable format found on DRM primary plane (tried: %s and %s).\n", + MP_ERR(ctx->vo, "No suitable format found on draw plane (tried: %s and %s).\n", gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format)); return false; } @@ -671,6 +666,20 @@ static bool drm_egl_init(struct ra_ctx *ctx) p->drm_params.connector_id = p->kms->connector->connector_id; if (p->kms->atomic_context) p->drm_params.atomic_request_ptr = &p->kms->atomic_context->request; + char *rendernode_path = drmGetRenderDeviceNameFromFd(p->kms->fd); + if (rendernode_path) { + MP_VERBOSE(ctx, "Opening render node \"%s\"\n", rendernode_path); + p->drm_params.render_fd = open(rendernode_path, O_RDWR | O_CLOEXEC); + if (p->drm_params.render_fd < 0) { + MP_WARN(ctx, "Cannot open render node \"%s\": %s. VAAPI hwdec will be disabled\n", + rendernode_path, mp_strerror(errno)); + } + free(rendernode_path); + } else { + p->drm_params.render_fd = -1; + MP_VERBOSE(ctx, "Could not find path to render node. VAAPI hwdec will be disabled\n"); + } + struct ra_gl_ctx_params params = { .swap_buffers = drm_egl_swap_buffers, .external_swapchain = p->kms->atomic_context ? &drm_atomic_swapchain : @@ -680,7 +689,7 @@ static bool drm_egl_init(struct ra_ctx *ctx) return false; ra_add_native_resource(ctx->ra, "drm_params", &p->drm_params); - ra_add_native_resource(ctx->ra, "drm_osd_size", &p->osd_size); + ra_add_native_resource(ctx->ra, "drm_draw_surface_size", &p->draw_surface_size); return true; } diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index f80c14500a..fee1f83f98 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -19,28 +19,53 @@ /* * This hwdec implements an optimized output path using CUDA->OpenGL - * interop for frame data that is stored in CUDA device memory. - * Although it is not explicit in the code here, the only practical way - * to get data in this form is from the 'cuvid' decoder (aka NvDecode). - * - * For now, cuvid/NvDecode will always return images in NV12 format, even - * when decoding 10bit streams (there is some hardware dithering going on). + * or CUDA->Vulkan interop for frame data that is stored in CUDA + * device memory. Although it is not explicit in the code here, the + * only practical way to get data in this form is from the + * nvdec/cuvid decoder. */ +#include <unistd.h> + #include <ffnvcodec/dynlink_loader.h> #include <libavutil/hwcontext.h> #include <libavutil/hwcontext_cuda.h> #include "video/out/gpu/hwdec.h" +#include "video/out/gpu/utils.h" #include "formats.h" #include "options/m_config.h" +#if HAVE_GL #include "ra_gl.h" +#endif +#if HAVE_VULKAN +#include "video/out/vulkan/formats.h" +#include "video/out/vulkan/ra_vk.h" +#include "video/out/vulkan/utils.h" +#endif + +#if HAVE_WIN32_DESKTOP +#include <versionhelpers.h> +#endif struct priv_owner { struct mp_hwdec_ctx hwctx; CudaFunctions *cu; CUcontext display_ctx; CUcontext decode_ctx; + + bool is_gl; + bool is_vk; +}; + +struct ext_buf { +#if HAVE_WIN32_DESKTOP + HANDLE handle; +#else + int fd; +#endif + CUexternalMemory mem; + CUdeviceptr buf; }; struct priv { @@ -49,6 +74,9 @@ struct priv { CUarray cu_array[4]; CUcontext display_ctx; + + struct ra_buf_params buf_params[4]; + struct ra_buf_pool buf_pool[4]; }; static int check_cu(struct ra_hwdec *hw, CUresult err, const char *func) @@ -81,17 +109,34 @@ static int cuda_init(struct ra_hwdec *hw) CUdevice display_dev; AVBufferRef *hw_device_ctx = NULL; CUcontext dummy; - unsigned int device_count; int ret = 0; struct priv_owner *p = hw->priv; CudaFunctions *cu; - if (!ra_is_gl(hw->ra)) - return -1; +#if HAVE_GL + p->is_gl = ra_is_gl(hw->ra); + if (p->is_gl) { + GL *gl = ra_gl_get(hw->ra); + if (gl->version < 210 && gl->es < 300) { + MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n"); + return -1; + } + } +#endif + +#if HAVE_VULKAN + p->is_vk = ra_vk_get(hw->ra) != NULL; + if (p->is_vk) { + if (!ra_vk_get(hw->ra)->has_ext_external_memory_export) { + MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires the %s extension\n", + MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME); + return -1; + } + } +#endif - GL *gl = ra_gl_get(hw->ra); - if (gl->version < 210 && gl->es < 300) { - MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n"); + if (!p->is_gl && !p->is_vk) { + MP_VERBOSE(hw, "CUDA hwdec only works with OpenGL or Vulkan backends.\n"); return -1; } @@ -102,46 +147,96 @@ static int cuda_init(struct ra_hwdec *hw) } cu = p->cu; + if (p->is_vk && !cu->cuImportExternalMemory) { + MP_ERR(hw, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n"); + return -1; + } + ret = CHECK_CU(cu->cuInit(0)); if (ret < 0) - goto error; + return -1; // Allocate display context - ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1, - CU_GL_DEVICE_LIST_ALL)); - if (ret < 0) - goto error; + if (p->is_gl) { + unsigned int device_count; + ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1, + CU_GL_DEVICE_LIST_ALL)); + if (ret < 0) + return -1; - ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, - display_dev)); - if (ret < 0) - goto error; + ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + display_dev)); + if (ret < 0) + return -1; - p->decode_ctx = p->display_ctx; + p->decode_ctx = p->display_ctx; - int decode_dev_idx = -1; - mp_read_option_raw(hw->global, "cuda-decode-device", &m_option_type_choice, - &decode_dev_idx); + int decode_dev_idx = -1; + mp_read_option_raw(hw->global, "cuda-decode-device", &m_option_type_choice, + &decode_dev_idx); - if (decode_dev_idx > -1) { - CUdevice decode_dev; - ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx)); - if (ret < 0) - goto error; + if (decode_dev_idx > -1) { + CUdevice decode_dev; + ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx)); + if (ret < 0) + goto error; + + if (decode_dev != display_dev) { + MP_INFO(hw, "Using separate decoder and display devices\n"); - if (decode_dev != display_dev) { - MP_INFO(hw, "Using separate decoder and display devices\n"); + // Pop the display context. We won't use it again during init() + ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (ret < 0) + return -1; + + ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + decode_dev)); + if (ret < 0) + return -1; + } + } + } else if (p->is_vk) { +#if HAVE_VULKAN + uint8_t vk_uuid[VK_UUID_SIZE]; + struct mpvk_ctx *vk = ra_vk_get(hw->ra); + + mpvk_get_phys_device_uuid(vk, vk_uuid); + + int count; + ret = CHECK_CU(cu->cuDeviceGetCount(&count)); + if (ret < 0) + return -1; - // Pop the display context. We won't use it again during init() - ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + display_dev = -1; + for (int i = 0; i < count; i++) { + CUdevice dev; + ret = CHECK_CU(cu->cuDeviceGet(&dev, i)); if (ret < 0) - goto error; + continue; - ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC, - decode_dev)); + CUuuid uuid; + ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev)); if (ret < 0) - goto error; + continue; + + if (memcmp(vk_uuid, uuid.bytes, VK_UUID_SIZE) == 0) { + display_dev = dev; + break; + } } + + if (display_dev == -1) { + MP_ERR(hw, "Could not match Vulkan display device in CUDA.\n"); + return -1; + } + + ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + display_dev)); + if (ret < 0) + return -1; + + p->decode_ctx = p->display_ctx; +#endif } hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); @@ -197,6 +292,106 @@ static void cuda_uninit(struct ra_hwdec *hw) #undef CHECK_CU #define CHECK_CU(x) check_cu((mapper)->owner, (x), #x) +#if HAVE_VULKAN +static struct ra_buf *cuda_buf_pool_get(struct ra_hwdec_mapper *mapper, int n) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + int ret = 0; + + struct ra_buf_pool *pool = &p->buf_pool[n]; + struct ra_buf *buf = ra_buf_pool_get(mapper->ra, pool, &p->buf_params[n]); + if (!buf) { + goto error; + } + + if (!ra_vk_buf_get_user_data(buf)) { + struct ext_buf *ebuf = talloc_zero(NULL, struct ext_buf); + struct vk_external_mem mem_info; + + bool success = ra_vk_buf_get_external_info(mapper->ra, buf, &mem_info); + if (!success) { + ret = -1; + goto error; + } + +#if HAVE_WIN32_DESKTOP + ebuf->handle = mem_info.mem_handle; + MP_DBG(mapper, "vk_external_info[%d][%d]: %p %zu %zu\n", n, pool->index, ebuf->handle, mem_info.size, mem_info.offset); +#else + ebuf->fd = mem_info.mem_fd; + MP_DBG(mapper, "vk_external_info[%d][%d]: %d %zu %zu\n", n, pool->index, ebuf->fd, mem_info.size, mem_info.offset); +#endif + + CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { +#if HAVE_WIN32_DESKTOP + .type = IsWindows8OrGreater() + ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 + : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT, + .handle.win32.handle = ebuf->handle, +#else + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + .handle.fd = ebuf->fd, +#endif + .size = mem_info.mem_size, + .flags = 0, + }; + ret = CHECK_CU(cu->cuImportExternalMemory(&ebuf->mem, &ext_desc)); + if (ret < 0) + goto error; + + CUDA_EXTERNAL_MEMORY_BUFFER_DESC buf_desc = { + .offset = mem_info.offset, + .size = mem_info.size, + .flags = 0, + }; + ret = CHECK_CU(cu->cuExternalMemoryGetMappedBuffer(&ebuf->buf, ebuf->mem, &buf_desc)); + if (ret < 0) + goto error; + + ra_vk_buf_set_user_data(buf, ebuf); + } + return buf; + +error: + MP_ERR(mapper, "cuda_buf_pool_get failed\n"); + return NULL; +} + +static void cuda_buf_pool_uninit(struct ra_hwdec_mapper *mapper, int n) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + + struct ra_buf_pool *pool = &p->buf_pool[n]; + for (int i = 0; i < pool->num_buffers; i++) { + struct ra_buf *buf = pool->buffers[i]; + struct ext_buf *ebuf = ra_vk_buf_get_user_data(buf); + if (ebuf) { + if (ebuf->mem > 0) { + CHECK_CU(cu->cuDestroyExternalMemory(ebuf->mem)); +#if HAVE_WIN32_DESKTOP + } + if (ebuf->handle) { + // Handle must always be closed by us. + CloseHandle(ebuf->handle); + } +#else + } else if (ebuf->fd > -1) { + // fd should only be closed if external memory was not imported + close(ebuf->fd); + } +#endif + } + talloc_free(ebuf); + ra_vk_buf_set_user_data(buf, NULL); + } + ra_buf_pool_uninit(mapper->ra, pool); +} +#endif // HAVE_VULKAN + static int mapper_init(struct ra_hwdec_mapper *mapper) { struct priv_owner *p_owner = mapper->owner->priv; @@ -243,27 +438,39 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) goto error; } - GLuint texture; - GLenum target; - ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target); + if (p_owner->is_gl) { +#if HAVE_GL + GLuint texture; + GLenum target; + ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target); - ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target, - CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)); - if (ret < 0) - goto error; + ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target, + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)); + if (ret < 0) + goto error; - ret = CHECK_CU(cu->cuGraphicsMapResources(1, &p->cu_res[n], 0)); - if (ret < 0) - goto error; + ret = CHECK_CU(cu->cuGraphicsMapResources(1, &p->cu_res[n], 0)); + if (ret < 0) + goto error; - ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n], - 0, 0)); - if (ret < 0) - goto error; + ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n], + 0, 0)); + if (ret < 0) + goto error; - ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &p->cu_res[n], 0)); - if (ret < 0) - goto error; + ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &p->cu_res[n], 0)); + if (ret < 0) + goto error; +#endif + } else if (p_owner->is_vk) { + struct ra_buf_params buf_params = { + .type = RA_BUF_TYPE_SHARED_MEMORY, + .size = mp_image_plane_h(&p->layout, n) * + mp_image_plane_w(&p->layout, n) * + mapper->tex[n]->params.format->pixel_size, + }; + p->buf_params[n] = buf_params; + } } error: @@ -288,6 +495,10 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper) CHECK_CU(cu->cuGraphicsUnregisterResource(p->cu_res[n])); p->cu_res[n] = 0; ra_tex_free(mapper->ra, &mapper->tex[n]); + +#if HAVE_VULKAN + cuda_buf_pool_uninit(mapper, n); +#endif } CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } @@ -303,28 +514,54 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) CudaFunctions *cu = p_owner->cu; CUcontext dummy; int ret = 0, eret = 0; + bool is_gl = p_owner->is_gl; + bool is_vk = p_owner->is_vk; ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); if (ret < 0) return ret; for (int n = 0; n < p->layout.num_planes; n++) { + struct ra_buf *buf = NULL; + CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, - .dstMemoryType = CU_MEMORYTYPE_ARRAY, .srcDevice = (CUdeviceptr)mapper->src->planes[n], .srcPitch = mapper->src->stride[n], .srcY = 0, - .dstArray = p->cu_array[n], .WidthInBytes = mp_image_plane_w(&p->layout, n) * mapper->tex[n]->params.format->pixel_size, .Height = mp_image_plane_h(&p->layout, n), }; + + if (is_gl) { + cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY; + cpy.dstArray = p->cu_array[n]; + } else if (is_vk) { +#if HAVE_VULKAN + buf = cuda_buf_pool_get(mapper, n); + struct ext_buf *ebuf = ra_vk_buf_get_user_data(buf); + + cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE; + cpy.dstDevice = ebuf->buf; + cpy.dstPitch = mp_image_plane_w(&p->layout, n) * + mapper->tex[n]->params.format->pixel_size; +#endif + } + ret = CHECK_CU(cu->cuMemcpy2D(&cpy)); if (ret < 0) goto error; - } + if (is_vk) { + struct ra_tex_upload_params params = { + .tex = mapper->tex[n], + .invalidate = true, + .buf = buf, + }; + mapper->ra->fns->tex_upload(mapper->ra, ¶ms); + } + } error: eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); diff --git a/video/out/opengl/hwdec_drmprime_drm.c b/video/out/opengl/hwdec_drmprime_drm.c index 24753067d6..fd3d383c55 100644 --- a/video/out/opengl/hwdec_drmprime_drm.c +++ b/video/out/opengl/hwdec_drmprime_drm.c @@ -114,14 +114,17 @@ static void disable_video_plane(struct ra_hwdec *hw) if (!p->ctx) return; - // Disabling video plane is needed on some devices when using the - // primary plane for video. Primary buffer can't be active with no - // framebuffer associated. So we need this function to commit it - // right away as mpv will free all framebuffers on playback end. + if (!p->ctx->drmprime_video_plane) + return; + + // Disabling the drmprime video plane is needed on some devices when using + // the primary plane for video. Primary buffer can't be active with no + // framebuffer associated. So we need this function to commit it right away + // as mpv will free all framebuffers on playback end. drmModeAtomicReqPtr request = drmModeAtomicAlloc(); if (request) { - drm_object_set_property(request, p->ctx->video_plane, "FB_ID", 0); |