From 07915b12273a36bc7f104a5f3fc949a407d243dc Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Sun, 15 Apr 2018 09:06:34 -0700 Subject: vo_gpu: hwdec: Use ffnvcodec to load CUDA symbols The CUDA dynamic loader was broken out of ffmpeg into its own repo and package. This gives us an opportunity to re-use it in mpv and remove our custom loader logic. --- video/out/opengl/cuda_dynamic.c | 63 ----------------- video/out/opengl/cuda_dynamic.h | 148 ---------------------------------------- video/out/opengl/hwdec_cuda.c | 78 ++++++++++++--------- wscript | 6 +- wscript_build.py | 1 - 5 files changed, 50 insertions(+), 246 deletions(-) delete mode 100644 video/out/opengl/cuda_dynamic.c delete mode 100644 video/out/opengl/cuda_dynamic.h diff --git a/video/out/opengl/cuda_dynamic.c b/video/out/opengl/cuda_dynamic.c deleted file mode 100644 index 1135a1f077..0000000000 --- a/video/out/opengl/cuda_dynamic.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include "cuda_dynamic.h" - -#include - -#if defined(_WIN32) -# include -# define dlopen(filename, flags) LoadLibrary(TEXT(filename)) -# define dlsym(handle, symbol) (void *)GetProcAddress(handle, symbol) -# define dlclose(handle) FreeLibrary(handle) -#else -# include -#endif - -#if defined(_WIN32) || defined(__CYGWIN__) -# define CUDA_LIBNAME "nvcuda.dll" -#else -# define CUDA_LIBNAME "libcuda.so.1" -#endif - -#define CUDA_DECL(NAME, TYPE) \ - TYPE *mpv_ ## NAME; -CUDA_FNS(CUDA_DECL) - -static bool cuda_loaded = false; -static pthread_once_t cuda_load_once = PTHREAD_ONCE_INIT; - -static void cuda_do_load(void) -{ - void *lib = dlopen(CUDA_LIBNAME, RTLD_LAZY); - if (!lib) { - return; - } - -#define CUDA_LOAD_SYMBOL(NAME, TYPE) \ - mpv_ ## NAME = dlsym(lib, #NAME); if (!mpv_ ## NAME) return; - - CUDA_FNS(CUDA_LOAD_SYMBOL) - - cuda_loaded = true; -} - -bool cuda_load(void) -{ - pthread_once(&cuda_load_once, cuda_do_load); - return cuda_loaded; -} diff --git a/video/out/opengl/cuda_dynamic.h b/video/out/opengl/cuda_dynamic.h deleted file mode 100644 index 9d75b31b7a..0000000000 --- a/video/out/opengl/cuda_dynamic.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * This file is part of mpv. - * - * It is based on an equivalent file in ffmpeg that was - * constructed from documentation, rather than from any - * original cuda headers. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MPV_CUDA_DYNAMIC_H -#define MPV_CUDA_DYNAMIC_H - -#include -#include - -#include "gl_headers.h" - -#define CUDA_VERSION 7050 - -#if defined(_WIN32) || defined(__CYGWIN__) -#define CUDAAPI __stdcall -#else -#define CUDAAPI -#endif - -#define CU_CTX_SCHED_BLOCKING_SYNC 4 - -typedef int CUdevice; - -typedef struct CUarray_st *CUarray; -typedef struct CUgraphicsResource_st *CUgraphicsResource; -typedef struct CUstream_st *CUstream; - -typedef void* CUcontext; -#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) -typedef unsigned long long CUdeviceptr; -#else -typedef unsigned int CUdeviceptr; -#endif - -typedef enum cudaError_enum { - CUDA_SUCCESS = 0 -} CUresult; - -typedef enum CUmemorytype_enum { - CU_MEMORYTYPE_HOST = 1, - CU_MEMORYTYPE_DEVICE = 2, - CU_MEMORYTYPE_ARRAY = 3 -} CUmemorytype; - -typedef struct CUDA_MEMCPY2D_st { - size_t srcXInBytes; - size_t srcY; - CUmemorytype srcMemoryType; - const void *srcHost; - CUdeviceptr srcDevice; - CUarray srcArray; - size_t srcPitch; - - size_t dstXInBytes; - size_t dstY; - CUmemorytype dstMemoryType; - void *dstHost; - CUdeviceptr dstDevice; - CUarray dstArray; - size_t dstPitch; - - size_t WidthInBytes; - size_t Height; -} CUDA_MEMCPY2D; - -typedef enum CUGLDeviceList_enum { - CU_GL_DEVICE_LIST_ALL = 1, - CU_GL_DEVICE_LIST_CURRENT_FRAME = 2, - CU_GL_DEVICE_LIST_NEXT_FRAME = 3, -} CUGLDeviceList; - -#define CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD 2 - -typedef CUresult CUDAAPI tcuInit(unsigned int Flags); -typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev); -typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx); -typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx); -typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx); -typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *pdevice, int ordinal); -typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy); -typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr); -typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr); -typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList); -typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags); -typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource); -typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream); -typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream); -typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); - -#define CUDA_FNS(FN) \ - FN(cuInit, tcuInit) \ - FN(cuCtxCreate_v2, tcuCtxCreate_v2) \ - FN(cuCtxPushCurrent_v2, tcuCtxPushCurrent_v2) \ - FN(cuCtxPopCurrent_v2, tcuCtxPopCurrent_v2) \ - FN(cuCtxDestroy_v2, tcuCtxDestroy_v2) \ - FN(cuDeviceGet, tcuDeviceGet) \ - FN(cuMemcpy2D_v2, tcuMemcpy2D_v2) \ - FN(cuGetErrorName, tcuGetErrorName) \ - FN(cuGetErrorString, tcuGetErrorString) \ - FN(cuGLGetDevices_v2, tcuGLGetDevices_v2) \ - FN(cuGraphicsGLRegisterImage, tcuGraphicsGLRegisterImage) \ - FN(cuGraphicsUnregisterResource, tcuGraphicsUnregisterResource) \ - FN(cuGraphicsMapResources, tcuGraphicsMapResources) \ - FN(cuGraphicsUnmapResources, tcuGraphicsUnmapResources) \ - FN(cuGraphicsSubResourceGetMappedArray, tcuGraphicsSubResourceGetMappedArray) \ - -#define CUDA_EXT_DECL(NAME, TYPE) \ - extern TYPE *mpv_ ## NAME; - -CUDA_FNS(CUDA_EXT_DECL) - -#define cuInit mpv_cuInit -#define cuCtxCreate mpv_cuCtxCreate_v2 -#define cuCtxPushCurrent mpv_cuCtxPushCurrent_v2 -#define cuCtxPopCurrent mpv_cuCtxPopCurrent_v2 -#define cuCtxDestroy mpv_cuCtxDestroy_v2 -#define cuDeviceGet mpv_cuDeviceGet -#define cuMemcpy2D mpv_cuMemcpy2D_v2 -#define cuGetErrorName mpv_cuGetErrorName -#define cuGetErrorString mpv_cuGetErrorString -#define cuGLGetDevices mpv_cuGLGetDevices_v2 -#define cuGraphicsGLRegisterImage mpv_cuGraphicsGLRegisterImage -#define cuGraphicsUnregisterResource mpv_cuGraphicsUnregisterResource -#define cuGraphicsMapResources mpv_cuGraphicsMapResources -#define cuGraphicsUnmapResources mpv_cuGraphicsUnmapResources -#define cuGraphicsSubResourceGetMappedArray mpv_cuGraphicsSubResourceGetMappedArray - -bool cuda_load(void); - -#endif // MPV_CUDA_DYNAMIC_H diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index 1a7df2020a..f80c14500a 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -27,8 +27,7 @@ * when decoding 10bit streams (there is some hardware dithering going on). */ -#include "cuda_dynamic.h" - +#include #include #include @@ -39,6 +38,7 @@ struct priv_owner { struct mp_hwdec_ctx hwctx; + CudaFunctions *cu; CUcontext display_ctx; CUcontext decode_ctx; }; @@ -56,13 +56,15 @@ static int check_cu(struct ra_hwdec *hw, CUresult err, const char *func) const char *err_name; const char *err_string; + struct priv_owner *p = hw->priv; + MP_TRACE(hw, "Calling %s\n", func); if (err == CUDA_SUCCESS) return 0; - cuGetErrorName(err, &err_name); - cuGetErrorString(err, &err_string); + p->cu->cuGetErrorName(err, &err_name); + p->cu->cuGetErrorString(err, &err_string); MP_ERR(hw, "%s failed", func); if (err_name && err_string) @@ -82,6 +84,7 @@ static int cuda_init(struct ra_hwdec *hw) unsigned int device_count; int ret = 0; struct priv_owner *p = hw->priv; + CudaFunctions *cu; if (!ra_is_gl(hw->ra)) return -1; @@ -92,24 +95,25 @@ static int cuda_init(struct ra_hwdec *hw) return -1; } - bool loaded = cuda_load(); - if (!loaded) { + ret = cuda_load_functions(&p->cu, NULL); + if (ret != 0) { MP_VERBOSE(hw, "Failed to load CUDA symbols\n"); return -1; } + cu = p->cu; - ret = CHECK_CU(cuInit(0)); + ret = CHECK_CU(cu->cuInit(0)); if (ret < 0) goto error; // Allocate display context - ret = CHECK_CU(cuGLGetDevices(&device_count, &display_dev, 1, - CU_GL_DEVICE_LIST_ALL)); + ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1, + CU_GL_DEVICE_LIST_ALL)); if (ret < 0) goto error; - ret = CHECK_CU(cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, - display_dev)); + ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + display_dev)); if (ret < 0) goto error; @@ -121,7 +125,7 @@ static int cuda_init(struct ra_hwdec *hw) if (decode_dev_idx > -1) { CUdevice decode_dev; - ret = CHECK_CU(cuDeviceGet(&decode_dev, decode_dev_idx)); + ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx)); if (ret < 0) goto error; @@ -129,12 +133,12 @@ static int cuda_init(struct ra_hwdec *hw) MP_INFO(hw, "Using separate decoder and display devices\n"); // Pop the display context. We won't use it again during init() - ret = CHECK_CU(cuCtxPopCurrent(&dummy)); + ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (ret < 0) goto error; - ret = CHECK_CU(cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC, - decode_dev)); + ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + decode_dev)); if (ret < 0) goto error; } @@ -155,7 +159,7 @@ static int cuda_init(struct ra_hwdec *hw) goto error; } - ret = CHECK_CU(cuCtxPopCurrent(&dummy)); + ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (ret < 0) goto error; @@ -168,7 +172,7 @@ static int cuda_init(struct ra_hwdec *hw) error: av_buffer_unref(&hw_device_ctx); - CHECK_CU(cuCtxPopCurrent(&dummy)); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return -1; } @@ -176,15 +180,18 @@ static int cuda_init(struct ra_hwdec *hw) static void cuda_uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; + CudaFunctions *cu = p->cu; hwdec_devices_remove(hw->devs, &p->hwctx); av_buffer_unref(&p->hwctx.av_device_ref); if (p->decode_ctx && p->decode_ctx != p->display_ctx) - CHECK_CU(cuCtxDestroy(p->decode_ctx)); + CHECK_CU(cu->cuCtxDestroy(p->decode_ctx)); if (p->display_ctx) - CHECK_CU(cuCtxDestroy(p->display_ctx)); + CHECK_CU(cu->cuCtxDestroy(p->display_ctx)); + + cuda_free_functions(&p->cu); } #undef CHECK_CU @@ -195,6 +202,7 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) struct priv_owner *p_owner = mapper->owner->priv; struct priv *p = mapper->priv; CUcontext dummy; + CudaFunctions *cu = p_owner->cu; int ret = 0, eret = 0; p->display_ctx = p_owner->display_ctx; @@ -212,7 +220,7 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) return -1; } - ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx)); + ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); if (ret < 0) return ret; @@ -239,27 +247,27 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) GLenum target; ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target); - ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target, - CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)); + ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target, + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)); if (ret < 0) goto error; - ret = CHECK_CU(cuGraphicsMapResources(1, &p->cu_res[n], 0)); + ret = CHECK_CU(cu->cuGraphicsMapResources(1, &p->cu_res[n], 0)); if (ret < 0) goto error; - ret = CHECK_CU(cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n], - 0, 0)); + ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n], + 0, 0)); if (ret < 0) goto error; - ret = CHECK_CU(cuGraphicsUnmapResources(1, &p->cu_res[n], 0)); + ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &p->cu_res[n], 0)); if (ret < 0) goto error; } error: - eret = CHECK_CU(cuCtxPopCurrent(&dummy)); + eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (eret < 0) return eret; @@ -269,17 +277,19 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) static void mapper_uninit(struct ra_hwdec_mapper *mapper) { struct priv *p = mapper->priv; + struct priv_owner *p_owner = mapper->owner->priv; + CudaFunctions *cu = p_owner->cu; CUcontext dummy; // Don't bail if any CUDA calls fail. This is all best effort. - CHECK_CU(cuCtxPushCurrent(p->display_ctx)); + CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); for (int n = 0; n < 4; n++) { if (p->cu_res[n] > 0) - CHECK_CU(cuGraphicsUnregisterResource(p->cu_res[n])); + CHECK_CU(cu->cuGraphicsUnregisterResource(p->cu_res[n])); p->cu_res[n] = 0; ra_tex_free(mapper->ra, &mapper->tex[n]); } - CHECK_CU(cuCtxPopCurrent(&dummy)); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } static void mapper_unmap(struct ra_hwdec_mapper *mapper) @@ -289,10 +299,12 @@ static void mapper_unmap(struct ra_hwdec_mapper *mapper) static int mapper_map(struct ra_hwdec_mapper *mapper) { struct priv *p = mapper->priv; + struct priv_owner *p_owner = mapper->owner->priv; + CudaFunctions *cu = p_owner->cu; CUcontext dummy; int ret = 0, eret = 0; - ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx)); + ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); if (ret < 0) return ret; @@ -308,14 +320,14 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) mapper->tex[n]->params.format->pixel_size, .Height = mp_image_plane_h(&p->layout, n), }; - ret = CHECK_CU(cuMemcpy2D(&cpy)); + ret = CHECK_CU(cu->cuMemcpy2D(&cpy)); if (ret < 0) goto error; } error: - eret = CHECK_CU(cuCtxPopCurrent(&dummy)); + eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (eret < 0) return eret; diff --git a/wscript b/wscript index f5a19594f1..09cbabc3f3 100644 --- a/wscript +++ b/wscript @@ -838,10 +838,14 @@ hwaccel_features = [ 'deps': 'gl-dxinterop && d3d9-hwaccel', 'groups': [ 'gl' ], 'func': check_true, + }, { + 'name': 'ffnvcodec', + 'desc': 'CUDA Headers and dynamic loader', + 'func': check_pkg_config('ffnvcodec >= 8.1.24.1'), }, { 'name': '--cuda-hwaccel', 'desc': 'CUDA hwaccel', - 'deps': 'gl', + 'deps': 'gl && ffnvcodec', 'func': check_cc(fragment=load_fragment('cuda.c'), use='libavcodec'), } diff --git a/wscript_build.py b/wscript_build.py index cd2c9d099f..4d93f2b498 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -461,7 +461,6 @@ def build(ctx): ( "video/out/opengl/context_wayland.c", "gl-wayland" ), ( "video/out/opengl/context_win.c", "gl-win32" ), ( "video/out/opengl/context_x11egl.c", "egl-x11" ), - ( "video/out/opengl/cuda_dynamic.c", "cuda-hwaccel" ), ( "video/out/opengl/egl_helpers.c", "egl-helpers" ), ( "video/out/opengl/formats.c", "gl" ), ( "video/out/opengl/hwdec_cuda.c", "cuda-hwaccel" ), -- cgit v1.2.3