36 files changed, 614 insertions, 373 deletions
diff --git a/video/out/cocoa_cb_common.swift b/video/out/cocoa_cb_common.swift
index 9f32ed651f..b2910dba85 100644
--- a/video/out/cocoa_cb_common.swift
+++ b/video/out/cocoa_cb_common.swift
@@ -87,8 +87,7 @@ class CocoaCB: Common, EventSubscriber {
     }
 
     func updateWindowSize(_ vo: UnsafeMutablePointer<vo>) {
-        guard let targetScreen = getTargetScreen(forFullscreen: false) ?? NSScreen.main else
-        {
+        guard let targetScreen = getTargetScreen(forFullscreen: false) ?? NSScreen.main else {
             log.warning("Couldn't update Window size, no Screen available")
             return
         }
@@ -102,11 +101,10 @@ class CocoaCB: Common, EventSubscriber {
     }
 
     override func displayLinkCallback(_ displayLink: CVDisplayLink,
-                                            _ inNow: UnsafePointer<CVTimeStamp>,
-                                     _ inOutputTime: UnsafePointer<CVTimeStamp>,
-                                          _ flagsIn: CVOptionFlags,
-                                         _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn
-    {
+                                      _ inNow: UnsafePointer<CVTimeStamp>,
+                                      _ inOutputTime: UnsafePointer<CVTimeStamp>,
+                                      _ flagsIn: CVOptionFlags,
+                                      _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn {
         libmpv.reportRenderFlip()
         return kCVReturnSuccess
     }
@@ -122,7 +120,55 @@ class CocoaCB: Common, EventSubscriber {
         }
 
         libmpv.setRenderICCProfile(colorSpace)
-        layer?.colorspace = colorSpace.cgColorSpace
+        layer?.colorspace = getColorSpace()
+    }
+
+    func getColorSpace() -> CGColorSpace? {
+        guard let colorSpace = window?.screen?.colorSpace?.cgColorSpace else {
+            log.warning("Couldn't retrieve ICC Profile, no color space available")
+            return nil
+        }
+
+        let outputCsp = Int(option.mac.cocoa_cb_output_csp)
+
+        switch outputCsp {
+        case MAC_CSP_AUTO: return colorSpace
+        case MAC_CSP_DISPLAY_P3: return CGColorSpace(name: CGColorSpace.displayP3)
+        case MAC_CSP_DISPLAY_P3_HLG: return CGColorSpace(name: CGColorSpace.displayP3_HLG)
+        case MAC_CSP_DISPLAY_P3_PQ: return CGColorSpace(name: CGColorSpace.displayP3_PQ)
+        case MAC_CSP_DCI_P3: return CGColorSpace(name: CGColorSpace.dcip3)
+        case MAC_CSP_BT_2020: return CGColorSpace(name: CGColorSpace.itur_2020)
+        case MAC_CSP_BT_709: return CGColorSpace(name: CGColorSpace.itur_709)
+        case MAC_CSP_SRGB: return CGColorSpace(name: CGColorSpace.sRGB)
+        case MAC_CSP_SRGB_LINEAR: return CGColorSpace(name: CGColorSpace.linearSRGB)
+        case MAC_CSP_RGB_LINEAR: return CGColorSpace(name: CGColorSpace.genericRGBLinear)
+        case MAC_CSP_ADOBE: return CGColorSpace(name: CGColorSpace.adobeRGB1998)
+        default: break
+        }
+
+#if HAVE_MACOS_11_FEATURES
+        if #available(macOS 11.0, *) {
+            switch outputCsp {
+            case MAC_CSP_BT_2100_HLG: return CGColorSpace(name: CGColorSpace.itur_2100_HLG)
+            case MAC_CSP_BT_2100_PQ: return CGColorSpace(name: CGColorSpace.itur_2100_PQ)
+            default: break
+            }
+        }
+#endif
+
+#if HAVE_MACOS_12_FEATURES
+        if #available(macOS 12.0, *) {
+            switch outputCsp {
+            case MAC_CSP_DISPLAY_P3_LINEAR: return CGColorSpace(name: CGColorSpace.linearDisplayP3)
+            case MAC_CSP_BT_2020_LINEAR: return CGColorSpace(name: CGColorSpace.linearITUR_2020)
+            default: break
+            }
+        }
+#endif
+
+        log.warning("Couldn't retrieve configured color space, falling back to auto")
+
+        return colorSpace
     }
 
     override func windowDidEndAnimation() {
@@ -178,10 +224,9 @@ class CocoaCB: Common, EventSubscriber {
     }
 
     override func control(_ vo: UnsafeMutablePointer<vo>,
-                    events: UnsafeMutablePointer<Int32>,
-                    request: UInt32,
-                    data: UnsafeMutableRawPointer?) -> Int32
-    {
+                          events: UnsafeMutablePointer<Int32>,
+                          request: UInt32,
+                          data: UnsafeMutableRawPointer?) -> Int32 {
         switch mp_voctrl(request) {
         case VOCTRL_PREINIT:
             DispatchQueue.main.sync { self.preinit(vo) }
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
index 84fd004adc..4438b2083c 100644
--- a/video/out/d3d11/ra_d3d11.c
+++ b/video/out/d3d11/ra_d3d11.c
@@ -13,6 +13,7 @@
 #include "osdep/windows_utils.h"
 #include "video/out/gpu/spirv.h"
 #include "video/out/gpu/utils.h"
+#include "video/out/gpu/d3d11_helpers.h"
 
 #include "ra_d3d11.h"
 
@@ -43,9 +44,11 @@ struct ra_d3d11 {
 
     struct dll_version d3d_compiler_ver;
 
+#if HAVE_DXGI_DEBUG
     // Debug interfaces (--gpu-debug)
-    ID3D11Debug *debug;
-    ID3D11InfoQueue *iqueue;
+    IDXGIDebug *debug;
+    IDXGIInfoQueue *iqueue;
+#endif
 
     // Device capabilities
     D3D_FEATURE_LEVEL fl;
@@ -2094,24 +2097,25 @@ static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer)
     return timer->result;
 }
 
-static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev)
+#if HAVE_DXGI_DEBUG
+static int map_msg_severity(DXGI_INFO_QUEUE_MESSAGE_SEVERITY sev)
 {
     switch (sev) {
-    case D3D11_MESSAGE_SEVERITY_CORRUPTION:
+    case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION:
         return MSGL_FATAL;
-    case D3D11_MESSAGE_SEVERITY_ERROR:
+    case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR:
         return MSGL_ERR;
-    case D3D11_MESSAGE_SEVERITY_WARNING:
+    case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING:
         return MSGL_WARN;
     default:
-    case D3D11_MESSAGE_SEVERITY_INFO:
-    case D3D11_MESSAGE_SEVERITY_MESSAGE:
+    case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO:
+    case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_MESSAGE:
         return MSGL_DEBUG;
     }
 }
 
 static int map_msg_severity_by_id(D3D11_MESSAGE_ID id,
-                                  D3D11_MESSAGE_SEVERITY sev)
+                                  DXGI_INFO_QUEUE_MESSAGE_SEVERITY sev)
 {
     switch (id) {
     // These are normal. The RA timer queue habitually reuses timer objects
@@ -2168,9 +2172,11 @@ static int map_msg_severity_by_id(D3D11_MESSAGE_ID id,
         return map_msg_severity(sev);
     }
 }
+#endif
 
 static void debug_marker(struct ra *ra, const char *msg)
 {
+#if HAVE_DXGI_DEBUG
     struct ra_d3d11 *p = ra->priv;
     void *talloc_ctx = talloc_new(NULL);
     HRESULT hr;
@@ -2180,33 +2186,38 @@ static void debug_marker(struct ra *ra, const char *msg)
 
     // Copy debug-layer messages to mpv's log output
     bool printed_header = false;
-    uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue);
+    uint64_t messages = IDXGIInfoQueue_GetNumStoredMessages(p->iqueue,
+                                                            DXGI_DEBUG_ALL);
     for (uint64_t i = 0; i < messages; i++) {
         SIZE_T len;
-        hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len);
+        hr = IDXGIInfoQueue_GetMessage(p->iqueue, DXGI_DEBUG_ALL, i, NULL, &len);
         if (FAILED(hr) || !len)
             goto done;
 
-        D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len);
-        hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len);
+        DXGI_INFO_QUEUE_MESSAGE *dxgimsg = talloc_size(talloc_ctx, len);
+        hr = IDXGIInfoQueue_GetMessage(p->iqueue, DXGI_DEBUG_ALL, i, dxgimsg, &len);
         if (FAILED(hr))
             goto done;
 
-        int msgl = map_msg_severity_by_id(d3dmsg->ID, d3dmsg->Severity);
+        int msgl = IsEqualGUID(&dxgimsg->Producer, &DXGI_DEBUG_D3D11)
+                        ? map_msg_severity_by_id(dxgimsg->ID, dxgimsg->Severity)
+                        : map_msg_severity(dxgimsg->Severity);
+
         if (mp_msg_test(ra->log, msgl)) {
             if (!printed_header)
                 MP_INFO(ra, "%s:\n", msg);
             printed_header = true;
 
-            MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID,
-                (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription);
-            talloc_free(d3dmsg);
+            MP_MSG(ra, msgl, "%d: %.*s\n", (int)dxgimsg->ID,
+                (int)dxgimsg->DescriptionByteLength, dxgimsg->pDescription);
+            talloc_free(dxgimsg);
         }
     }
 
-    ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
+    IDXGIInfoQueue_ClearStoredMessages(p->iqueue, DXGI_DEBUG_ALL);
 done:
     talloc_free(talloc_ctx);
+#endif
 }
 
 static void destroy(struct ra *ra)
@@ -2237,16 +2248,18 @@ static void destroy(struct ra *ra)
     }
     SAFE_RELEASE(p->ctx);
 
+#if HAVE_DXGI_DEBUG
     if (p->debug) {
         // Report any leaked objects
         debug_marker(ra, "after destroy");
-        ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL);
+        IDXGIDebug_ReportLiveObjects(p->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_DETAIL);
         debug_marker(ra, "after leak check");
-        ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY);
+        IDXGIDebug_ReportLiveObjects(p->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_SUMMARY);
         debug_marker(ra, "after leak summary");
     }
     SAFE_RELEASE(p->debug);
     SAFE_RELEASE(p->iqueue);
+#endif
 
     talloc_free(ra);
 }
@@ -2280,34 +2293,6 @@ void ra_d3d11_flush(struct ra *ra)
     ID3D11DeviceContext_Flush(p->ctx);
 }
 
-static void init_debug_layer(struct ra *ra)
-{
-    struct ra_d3d11 *p = ra->priv;
-    HRESULT hr;
-
-    hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug,
-                                     (void**)&p->debug);
-    if (FAILED(hr)) {
-        MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr));
-        return;
-    }
-
-    hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue,
-                                     (void**)&p->iqueue);
-    if (FAILED(hr)) {
-        MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr));
-        return;
-    }
-
-    // Store an unlimited amount of messages in the buffer. This is fine
-    // because we flush stored messages regularly (in debug_marker.)
-    ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1);
-
-    // Push empty filter to get everything
-    D3D11_INFO_QUEUE_FILTER filter = {0};
-    ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter);
-}
-
 static struct dll_version get_dll_version(HMODULE dll)
 {
     void *ctx = talloc_new(NULL);
@@ -2466,8 +2451,10 @@ struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
         p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT;
     }
 
+#if HAVE_DXGI_DEBUG
     if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG)
-        init_debug_layer(ra);
+        mp_d3d11_get_debug_interfaces(ra->log, &p->debug, &p->iqueue);
+#endif
 
     // Some level 9_x devices don't have timestamp queries
     hr = ID3D11Device_CreateQuery(p->dev,
@@ -2481,9 +2468,11 @@ struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
     // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx
     find_max_texture_dimension(ra);
 
+#if HAVE_DXGI_DEBUG
     // Ignore any messages during find_max_texture_dimension
     if (p->iqueue)
-        ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
+        IDXGIInfoQueue_ClearStoredMessages(p->iqueue, DXGI_DEBUG_ALL);
+#endif
 
     MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh,
                ra->max_texture_wh);
diff --git a/video/out/drm_atomic.c b/video/out/drm_atomic.c
index 5754504e98..c1a15fa75f 100644
--- a/video/out/drm_atomic.c
+++ b/video/out/drm_atomic.c
@@ -43,7 +43,7 @@ int drm_object_create_properties(struct mp_log *log, int fd,
 
     return 0;
 
-  fail:
+fail:
     drm_object_free_properties(object);
     return -1;
 }
@@ -68,43 +68,43 @@ void drm_object_free_properties(struct drm_object *object)
 
 int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value)
 {
-   for (int i = 0; i < object->props->count_props; i++) {
-       if (strcasecmp(name, object->props_info[i]->name) == 0) {
-           *value = object->props->prop_values[i];
-           return 0;
-       }
-   }
-
-   return -EINVAL;
+    for (int i = 0; i < object->props->count_props; i++) {
+        if (strcasecmp(name, object->props_info[i]->name) == 0) {
+            *value = object->props->prop_values[i];
+            return 0;
+        }
+    }
+
+    return -EINVAL;
 }
 
 drmModePropertyBlobPtr drm_object_get_property_blob(struct drm_object *object, char *name)
 {
-   uint64_t blob_id;
+    uint64_t blob_id;
 
-   if (!drm_object_get_property(object, name, &blob_id)) {
-       return drmModeGetPropertyBlob(object->fd, blob_id);
-   }
+    if (!drm_object_get_property(object, name, &blob_id)) {
+        return drmModeGetPropertyBlob(object->fd, blob_id);
+    }
 
-   return NULL;
+    return NULL;
 }
 
 int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object,
                             char *name, uint64_t value)
 {
-   for (int i = 0; i < object->props->count_props; i++) {
-       if (strcasecmp(name, object->props_info[i]->name) == 0) {
-           if (object->props_info[i]->flags & DRM_MODE_PROP_IMMUTABLE) {
-               /* Do not try to set immutable values, as this might cause the
-                * atomic commit operation to fail. */
-               return -EINVAL;
-           }
-           return drmModeAtomicAddProperty(request, object->id,
-                                           object->props_info[i]->prop_id, value);
-       }
-   }
-
-   return -EINVAL;
+    for (int i = 0; i < object->props->count_props; i++) {
+        if (strcasecmp(name, object->props_info[i]->name) == 0) {
+            if (object->props_info[i]->flags & DRM_MODE_PROP_IMMUTABLE) {
+                /* Do not try to set immutable values, as this might cause the
+                 * atomic commit operation to fail. */
+                return -EINVAL;
+            }
+            return drmModeAtomicAddProperty(request, object->id,
+                                            object->props_info[i]->prop_id, value);
+        }
+    }
+
+    return -EINVAL;
 }
 
 struct drm_object *drm_object_create(struct mp_log *log, int fd,
diff --git a/video/out/drm_common.c b/video/out/drm_common.c
index e47de7df86..0f65a8426a 100644
--- a/video/out/drm_common.c
+++ b/video/out/drm_common.c
@@ -583,7 +583,7 @@ static bool setup_crtc(struct vo_drm_state *drm, const drmModeRes *res)
            drm->connector->connector_id);
     return false;
 
-  success:
+success:
     MP_VERBOSE(drm, "Selected Encoder %u with CRTC %u\n",
                drm->encoder->encoder_id, drm->crtc_id);
     return true;
diff --git a/video/out/drm_prime.c b/video/out/drm_prime.c
index 9335fa8e02..581eb717bb 100644
--- a/video/out/drm_prime.c
+++ b/video/out/drm_prime.c
@@ -86,13 +86,13 @@ int drm_prime_create_framebuffer(struct mp_log *log, int fd,
         for (int plane = 0; plane < AV_DRM_MAX_PLANES; plane++) {
             drm_prime_add_handle_ref(handle_refs, framebuffer->gem_handles[plane]);
         }
-   }
+    }
 
-   return 0;
+    return 0;
 
 fail:
-   memset(framebuffer, 0, sizeof(*framebuffer));
-   return -1;
+    memset(framebuffer, 0, sizeof(*framebuffer));
+    return -1;
 }
 
 void drm_prime_destroy_framebuffer(struct mp_log *log, int fd,
@@ -129,9 +129,11 @@ void drm_prime_add_handle_ref(struct drm_prime_handle_refs *handle_refs,
 {
     if (handle) {
         if (handle > handle_refs->size) {
-            handle_refs->size = handle;
             MP_TARRAY_GROW(handle_refs->ctx, handle_refs->handle_ref_count,
-                           handle_refs->size);
+                           handle - 1);
+            uint32_t *p = handle_refs->handle_ref_count;
+            memset(&p[handle_refs->size], 0, (handle - handle_refs->size) * sizeof(p[0]));
+            handle_refs->size = handle;
         }
         handle_refs->handle_ref_count[handle - 1]++;
     }
diff --git a/video/out/gpu/d3d11_helpers.c b/video/out/gpu/d3d11_helpers.c
index d45c038553..fa37d5ef30 100644
--- a/video/out/gpu/d3d11_helpers.c
+++ b/video/out/gpu/d3d11_helpers.c
@@ -32,14 +32,17 @@
 // Windows 8 enum value, not present in mingw-w64 headers
 #define DXGI_ADAPTER_FLAG_SOFTWARE (2)
 typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory);
+typedef HRESULT(WINAPI *PFN_DXGI_GET_DEBUG_INTERFACE)(REFIID riid, void **ppDebug);
 
 static mp_once d3d11_once = MP_STATIC_ONCE_INITIALIZER;
 static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL;
 static PFN_CREATE_DXGI_FACTORY pCreateDXGIFactory1 = NULL;
+static PFN_DXGI_GET_DEBUG_INTERFACE pDXGIGetDebugInterface = NULL;
 static void d3d11_load(void)
 {
     HMODULE d3d11   = LoadLibraryW(L"d3d11.dll");
     HMODULE dxgilib = LoadLibraryW(L"dxgi.dll");
+    HMODULE dxgidebuglib = LoadLibraryW(L"dxgidebug.dll");
     if (!d3d11 || !dxgilib)
         return;
 
@@ -47,6 +50,10 @@ static void d3d11_load(void)
         GetProcAddress(d3d11, "D3D11CreateDevice");
     pCreateDXGIFactory1 = (PFN_CREATE_DXGI_FACTORY)
         GetProcAddress(dxgilib, "CreateDXGIFactory1");
+    if (dxgidebuglib) {
+        pDXGIGetDebugInterface = (PFN_DXGI_GET_DEBUG_INTERFACE)
+            GetProcAddress(dxgidebuglib, "DXGIGetDebugInterface");
+    }
 }
 
 static bool load_d3d11_functions(struct mp_log *log)
@@ -995,3 +1002,40 @@ done:
     SAFE_RELEASE(output6);
     return ret;
 }
+
+#if HAVE_DXGI_DEBUG
+void mp_d3d11_get_debug_interfaces(struct mp_log *log, IDXGIDebug **debug,
+                                   IDXGIInfoQueue **iqueue)
+{
+    load_d3d11_functions(log);
+
+    *iqueue = NULL;
+    *debug = NULL;
+
+    if (!pDXGIGetDebugInterface)
+        return;
+
+    HRESULT hr;
+
+    hr = pDXGIGetDebugInterface(&IID_IDXGIInfoQueue, (void **) iqueue);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr));
+        return;
+    }
+
+    // Store an unlimited amount of messages in the buffer. This is fine
+    // because we flush stored messages regularly (in debug_marker.)
+    IDXGIInfoQueue_SetMessageCountLimit(*iqueue, DXGI_DEBUG_D3D11, -1);
+    IDXGIInfoQueue_SetMessageCountLimit(*iqueue, DXGI_DEBUG_DXGI, -1);
+
+    // Push empty filter to get everything
+    DXGI_INFO_QUEUE_FILTER filter = {0};
+    IDXGIInfoQueue_PushStorageFilter(*iqueue, DXGI_DEBUG_ALL, &filter);
+
+    hr = pDXGIGetDebugInterface(&IID_IDXGIDebug, (void **) debug);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr));
+        return;
+    }
+}
+#endif
diff --git a/video/out/gpu/d3d11_helpers.h b/video/out/gpu/d3d11_helpers.h
index 6cc6818064..92322b9152 100644
--- a/video/out/gpu/d3d11_helpers.h
+++ b/video/out/gpu/d3d11_helpers.h
@@ -24,6 +24,10 @@
 #include <dxgi1_2.h>
 #include <dxgi1_6.h>
 
+#if HAVE_DXGI_DEBUG
+#include <dxgidebug.h>
+#endif
+
 #include "video/mp_image.h"
 
 #define D3D_FEATURE_LEVEL_12_0 (0xc000)
@@ -35,6 +39,10 @@
 #define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020    ((DXGI_COLOR_SPACE_TYPE)23)
 #define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)24)
 
+#if !HAVE_DXGI_DEBUG_D3D11
+DEFINE_GUID(DXGI_DEBUG_D3D11, 0x4b99317b, 0xac39, 0x4aa6, 0xbb, 0xb, 0xba, 0xa0, 0x47, 0x84, 0x79, 0x8f);
+#endif
+
 struct d3d11_device_opts {
     // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG)
     bool debug;
@@ -109,4 +117,9 @@ bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log,
                                struct d3d11_swapchain_opts *opts,
                                IDXGISwapChain **swapchain_out);
 
+#if HAVE_DXGI_DEBUG
+void mp_d3d11_get_debug_interfaces(struct mp_log *log, IDXGIDebug **debug,
+                                   IDXGIInfoQueue **iqueue);
+#endif
+
 #endif
diff --git a/video/out/gpu/lcms.h b/video/out/gpu/lcms.h
index d0b0fe5b89..30a6c54e83 100644
--- a/video/out/gpu/lcms.h
+++ b/video/out/gpu/lcms.h
@@ -44,6 +44,8 @@ bool gl_lcms_has_changed(struct gl_lcms *p, enum pl_color_primaries prim,
 
 static inline bool gl_parse_3dlut_size(const char *arg, int *p1, int *p2, int *p3)
 {
+    if (!arg)
+        return false;
     if (!strcmp(arg, "auto")) {
         *p1 = *p2 = *p3 = 0;
         return true;
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 1478ec4687..4d8e197229 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -965,8 +965,8 @@ static void init_video(struct gl_video *p)
 
 static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr)
 {
-   for (int i = 0; i < p->num_dr_buffers; i++) {
-       struct dr_buffer *buffer = &p->dr_buffers[i];
+    for (int i = 0; i < p->num_dr_buffers; i++) {
+        struct dr_buffer *buffer = &p->dr_buffers[i];
         uint8_t *bufptr = buffer->buf->data;
         size_t size = buffer->buf->params.size;
         if (ptr >= bufptr && ptr < bufptr + size)
diff --git a/video/out/hwdec/dmabuf_interop_wl.c b/video/out/hwdec/dmabuf_interop_wl.c
index 606a0aa601..78baf146f0 100644
--- a/video/out/hwdec/dmabuf_interop_wl.c
+++ b/video/out/hwdec/dmabuf_interop_wl.c
@@ -46,7 +46,7 @@ static bool map(struct ra_hwdec_mapper *mapper,
         return false;
     } else if (!ra_compatible_format(mapper->ra, drm_format,
         mapper_p->desc.objects[0].format_modifier)) {
-        MP_VERBOSE(mapper, "Mapped surface with format %s; drm format '%s(%016lx)' "
+        MP_VERBOSE(mapper, "Mapped surface with format %s; drm format '%s(%016" PRIx64 ")' "
                    "is not supported by compositor.\n",
                    mp_imgfmt_to_name(mapper->src->params.hw_subfmt),
                    mp_tag_str(drm_format),
@@ -54,7 +54,7 @@ static bool map(struct ra_hwdec_mapper *mapper,
         return false;
     }
 
-    MP_VERBOSE(mapper, "Supported Wayland display format %s: '%s(%016lx)'\n",
+    MP_VERBOSE(mapper, "Supported Wayland display format %s: '%s(%016" PRIx64 ")'\n",
                mp_imgfmt_to_name(mapper->src->params.hw_subfmt),
                mp_tag_str(drm_format), mapper_p->desc.objects[0].format_modifier);
 
diff --git a/video/out/hwdec/hwdec_cuda.c b/video/out/hwdec/hwdec_cuda.c
index 57e4fb40e3..8987cf3407 100644
--- a/video/out/hwdec/hwdec_cuda.c
+++ b/video/out/hwdec/hwdec_cuda.c
@@ -57,12 +57,12 @@ int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)
 
 #define CHECK_CU(x) check_cu(hw, (x), #x)
 
-static const cuda_interop_init interop_inits[] = {
+static const struct cuda_interop_fn *interop_fns[] = {
 #if HAVE_GL
-    cuda_gl_init,
+    &cuda_gl_fn,
 #endif
 #if HAVE_VULKAN
-    cuda_vk_init,
+    &cuda_vk_fn,
 #endif
     NULL
 };
@@ -73,25 +73,36 @@ static int cuda_init(struct ra_hwdec *hw)
     CUcontext dummy;
     int ret = 0;
     struct cuda_hw_priv *p = hw->priv;
-    CudaFunctions *cu;
+    CudaFunctions *cu = NULL;
     int level = hw->probing ? MSGL_V : MSGL_ERR;
-
-    ret = cuda_load_functions(&p->cu, NULL);
-    if (ret != 0) {
-        MP_MSG(hw, level, "Failed to load CUDA symbols\n");
-        return -1;
-    }
-    cu = p->cu;
-
-    ret = CHECK_CU(cu->cuInit(0));
-    if (ret < 0)
-        return -1;
+    bool initialized = false;
 
     // Initialise CUDA context from backend.
-    for (int i = 0; interop_inits[i]; i++) {
-        if (interop_inits[i](hw)) {
-            break;
+    // Note that the interop check doesn't require the CUDA backend to be initialized.
+    // This is important because cuInit wakes up the dgpu (even if the cuda hwdec won't be used!)
+    // Doing this allows us to check if CUDA should be used without waking up the dgpu, avoiding
+    // a few seconds of delay and improving battery life for laptops!
+    for (int i = 0; interop_fns[i]; i++) {
+        if (!interop_fns[i]->check(hw))
+            continue;
+
+        if (!initialized) {
+            ret = cuda_load_functions(&p->cu, NULL);
+            if (ret != 0) {
+                MP_MSG(hw, level, "Failed to load CUDA symbols\n");
+                return -1;
+            }
+
+            cu = p->cu;
+            ret = CHECK_CU(cu->cuInit(0));
+            if (ret < 0)
+                return -1;
+
+            initialized = true;
         }
+
+        if (interop_fns[i]->init(hw))
+            break;
     }
 
     if (!p->ext_init || !p->ext_uninit) {
diff --git a/video/out/hwdec/hwdec_cuda.h b/video/out/hwdec/hwdec_cuda.h
index 9c55053d59..6e671b364e 100644
--- a/video/out/hwdec/hwdec_cuda.h
+++ b/video/out/hwdec/hwdec_cuda.h
@@ -50,10 +50,13 @@ struct cuda_mapper_priv {
     void *ext[4];
 };
 
-typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw);
+struct cuda_interop_fn {
+    bool (*check)(const struct ra_hwdec *hw);
+    bool (*init)(const struct ra_hwdec *hw);
+};
 
-bool cuda_gl_init(const struct ra_hwdec *hw);
+extern struct cuda_interop_fn cuda_gl_fn;
 
-bool cuda_vk_init(const struct ra_hwdec *hw);
+extern struct cuda_interop_fn cuda_vk_fn;
 
 int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func);
diff --git a/video/out/hwdec/hwdec_cuda_gl.c b/video/out/hwdec/hwdec_cuda_gl.c
index f20540ed4d..4c2535232f 100644
--- a/video/out/hwdec/hwdec_cuda_gl.c
+++ b/video/out/hwdec/hwdec_cuda_gl.c
@@ -25,7 +25,6 @@
 
 #include <libavutil/hwcontext.h>
 #include <libavutil/hwcontext_cuda.h>
-#include <unistd.h>
 
 #define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
 
@@ -106,22 +105,24 @@ static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n)
 #undef CHECK_CU
 #define CHECK_CU(x) check_cu(hw, (x), #x)
 
-bool cuda_gl_init(const struct ra_hwdec *hw) {
-    int ret = 0;
-    struct cuda_hw_priv *p = hw->priv;
-    CudaFunctions *cu = p->cu;
+static bool cuda_gl_check(const struct ra_hwdec *hw) {
+    if (!ra_is_gl(hw->ra_ctx->ra))
+        return false; // This is not an OpenGL RA.
 
-    if (ra_is_gl(hw->ra_ctx->ra)) {
-        GL *gl = ra_gl_get(hw->ra_ctx->ra);
-        if (gl->version < 210 && gl->es < 300) {
-            MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
-            return false;
-        }
-    } else {
-        // This is not an OpenGL RA.