From f5e82d5ed345dbb894ff75591abc4b262b65d0dd Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Sat, 8 Oct 2016 16:51:15 -0700 Subject: vo_opengl: hwdec_cuda: Use dynamic loading for cuda functions This change applies the pattern used in ffmpeg to dynamically load cuda, to avoid requiring the CUDA SDK at build time. --- video/decode/cuda.c | 30 ++------- video/hwdec.h | 1 + video/out/opengl/cuda_dynamic.c | 63 ++++++++++++++++++ video/out/opengl/cuda_dynamic.h | 139 ++++++++++++++++++++++++++++++++++++++++ video/out/opengl/hwdec_cuda.c | 11 +++- 5 files changed, 218 insertions(+), 26 deletions(-) create mode 100644 video/out/opengl/cuda_dynamic.c create mode 100644 video/out/opengl/cuda_dynamic.h (limited to 'video') diff --git a/video/decode/cuda.c b/video/decode/cuda.c index b606315906..cad02b2353 100644 --- a/video/decode/cuda.c +++ b/video/decode/cuda.c @@ -17,6 +17,10 @@ * License along with mpv. If not, see . */ +// This define and typedef prevent hwcontext_cuda.h trying to include cuda.h +#define CUDA_VERSION 7050 +typedef void * CUcontext; + #include #include @@ -24,16 +28,6 @@ #include "video/fmt-conversion.h" #include "video/decode/lavc.h" -typedef struct CUVIDContext { - CUcontext cuda_ctx; -} CUVIDContext; - -static void cuvid_ctx_free(AVHWDeviceContext *ctx) -{ - AVCUDADeviceContext *hwctx = ctx->hwctx; - cuCtxDestroy(hwctx->cuda_ctx); -} - static int probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec) { @@ -44,12 +38,7 @@ static int probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, static int init(struct lavc_ctx *ctx) { - struct CUVIDContext *p = talloc_ptrtype(NULL, p); - - *p = (struct CUVIDContext) { - .cuda_ctx = hwdec_devices_get(ctx->hwdec_devs, HWDEC_CUDA)->ctx, - }; - ctx->hwdec_priv = p; + ctx->hwdec_priv = hwdec_devices_get(ctx->hwdec_devs, HWDEC_CUDA)->ctx; return 0; } @@ -59,7 +48,6 @@ static int init_decoder(struct lavc_ctx *ctx, int w, int h) AVCUDADeviceContext *device_hwctx; AVHWDeviceContext *device_ctx; AVHWFramesContext *hwframe_ctx; - CUVIDContext *priv = ctx->hwdec_priv; int ret = 0; if (avctx->hw_frames_ctx) { @@ -74,10 +62,9 @@ static int init_decoder(struct lavc_ctx *ctx, int w, int h) } device_ctx = (AVHWDeviceContext*)hw_device_ctx->data; - device_ctx->free = cuvid_ctx_free; device_hwctx = device_ctx->hwctx; - device_hwctx->cuda_ctx = priv->cuda_ctx; + device_hwctx->cuda_ctx = ctx->hwdec_priv; ret = av_hwdevice_ctx_init(hw_device_ctx); if (ret < 0) { @@ -104,11 +91,6 @@ static int init_decoder(struct lavc_ctx *ctx, int w, int h) static void uninit(struct lavc_ctx *ctx) { - struct CUVIDContext *p = ctx->hwdec_priv; - if (!p) - return; - - talloc_free(p); ctx->hwdec_priv = NULL; } diff --git a/video/hwdec.h b/video/hwdec.h index 857d07c894..f2fa7943af 100644 --- a/video/hwdec.h +++ b/video/hwdec.h @@ -44,6 +44,7 @@ struct mp_hwdec_ctx { // HWDEC_D3D11VA: ID3D11Device* // HWDEC_DXVA2: IDirect3DDevice9* // HWDEC_DXVA2_COPY: IDirect3DDevice9* + // HWDEC_CUDA: CUcontext* void *ctx; // Optional. diff --git a/video/out/opengl/cuda_dynamic.c b/video/out/opengl/cuda_dynamic.c new file mode 100644 index 0000000000..112e81a521 --- /dev/null +++ b/video/out/opengl/cuda_dynamic.c @@ -0,0 +1,63 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include "cuda_dynamic.h" + +#include + +#if defined(_WIN32) +# include +# define dlopen(filename, flags) LoadLibrary(TEXT(filename)) +# define dlsym(handle, symbol) GetProcAddress(handle, symbol) +# define dlclose(handle) FreeLibrary(handle) +#else +# include +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +# define CUDA_LIBNAME "nvcuda.dll" +#else +# define CUDA_LIBNAME "libcuda.so.1" +#endif + +#define CUDA_DECL(NAME, TYPE) \ + TYPE *NAME; +CUDA_FNS(CUDA_DECL) + +static bool cuda_loaded = false; +static pthread_once_t cuda_load_once = PTHREAD_ONCE_INIT; + +static void cuda_do_load(void) +{ + void *lib = dlopen(CUDA_LIBNAME, RTLD_LAZY); + if (!lib) { + return; + } + +#define CUDA_LOAD_SYMBOL(NAME, TYPE) \ + NAME = dlsym(lib, #NAME); if (!NAME) return; + + CUDA_FNS(CUDA_LOAD_SYMBOL) + + cuda_loaded = true; +} + +bool cuda_load(void) +{ + pthread_once(&cuda_load_once, cuda_do_load); + return cuda_loaded; +} diff --git a/video/out/opengl/cuda_dynamic.h b/video/out/opengl/cuda_dynamic.h new file mode 100644 index 0000000000..d906b6787f --- /dev/null +++ b/video/out/opengl/cuda_dynamic.h @@ -0,0 +1,139 @@ +/* + * This file is part of mpv. + * + * It is based on an equivalent file in ffmpeg that was + * constructed from documentation, rather than from any + * original cuda headers. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#if !defined(MPV_CUDA_DYNAMIC_H) && !defined(CUDA_VERSION) +#define MPV_CUDA_DYNAMIC_H + +#include +#include + +#define CUDA_VERSION 7050 + +#if defined(_WIN32) || defined(__CYGWIN__) +#define CUDAAPI __stdcall +#else +#define CUDAAPI +#endif + +#define CU_CTX_SCHED_BLOCKING_SYNC 4 + +typedef int CUdevice; + +typedef struct CUarray_st *CUarray; +typedef struct CUgraphicsResource_st *CUgraphicsResource; +typedef struct CUstream_st *CUstream; + +typedef void* CUcontext; +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif + +typedef enum cudaError_enum { + CUDA_SUCCESS = 0 +} CUresult; + +typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 1, + CU_MEMORYTYPE_DEVICE = 2, + CU_MEMORYTYPE_ARRAY = 3 +} CUmemorytype; + +typedef struct CUDA_MEMCPY2D_st { + size_t srcXInBytes; + size_t srcY; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + size_t srcPitch; + + size_t dstXInBytes; + size_t dstY; + CUmemorytype dstMemoryType; + void *dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + size_t dstPitch; + + size_t WidthInBytes; + size_t Height; +} CUDA_MEMCPY2D; + +typedef enum CUGLDeviceList_enum { + CU_GL_DEVICE_LIST_ALL = 1, + CU_GL_DEVICE_LIST_CURRENT_FRAME = 2, + CU_GL_DEVICE_LIST_NEXT_FRAME = 3, +} CUGLDeviceList; + +typedef unsigned int GLenum; +typedef unsigned int GLuint; + +#define CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD 2 + +typedef CUresult CUDAAPI tcuInit(unsigned int Flags); +typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev); +typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx); +typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx); +typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx); +typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy); +typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr); +typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr); +typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList); +typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags); +typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource); +typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream); +typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream); +typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); + +#define CUDA_FNS(FN) \ + FN(cuInit, tcuInit) \ + FN(cuCtxCreate_v2, tcuCtxCreate_v2) \ + FN(cuCtxPushCurrent_v2, tcuCtxPushCurrent_v2) \ + FN(cuCtxPopCurrent_v2, tcuCtxPopCurrent_v2) \ + FN(cuCtxDestroy_v2, tcuCtxDestroy_v2) \ + FN(cuMemcpy2D_v2, tcuMemcpy2D_v2) \ + FN(cuGetErrorName, tcuGetErrorName) \ + FN(cuGetErrorString, tcuGetErrorString) \ + FN(cuGLGetDevices_v2, tcuGLGetDevices_v2) \ + FN(cuGraphicsGLRegisterImage, tcuGraphicsGLRegisterImage) \ + FN(cuGraphicsUnregisterResource, tcuGraphicsUnregisterResource) \ + FN(cuGraphicsMapResources, tcuGraphicsMapResources) \ + FN(cuGraphicsUnmapResources, tcuGraphicsUnmapResources) \ + FN(cuGraphicsUnmapResources, tcuGraphicsUnmapResources) \ + FN(cuGraphicsSubResourceGetMappedArray, tcuGraphicsSubResourceGetMappedArray) \ + +#define CUDA_EXT_DECL(NAME, TYPE) \ + extern TYPE *NAME; + +CUDA_FNS(CUDA_EXT_DECL) + +#define cuCtxCreate cuCtxCreate_v2 +#define cuCtxPushCurrent cuCtxPushCurrent_v2 +#define cuCtxPopCurrent cuCtxPopCurrent_v2 +#define cuCtxDestroy cuCtxDestroy_v2 +#define cuMemcpy2D cuMemcpy2D_v2 +#define cuGLGetDevices cuGLGetDevices_v2 + +bool cuda_load(void); + +#endif // MPV_CUDA_DYNAMIC_H diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index 4dc842706c..266714a972 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -28,13 +28,13 @@ */ #include -#include +#include "cuda_dynamic.h" #include "video/mp_image_pool.h" #include "hwdec.h" #include "video.h" -#include +#include struct priv { struct mp_hwdec_ctx hwctx; @@ -152,6 +152,11 @@ static int cuda_create(struct gl_hwdec *hw) struct priv *p = talloc_zero(hw, struct priv); hw->priv = p; + bool loaded = cuda_load(); + if (!loaded) { + MP_ERR(hw, "Failed to load CUDA symbols\n"); + } + ret = CHECK_CU(cuInit(0)); if (ret < 0) goto error; @@ -277,6 +282,8 @@ static void destroy(struct gl_hwdec *hw) } CHECK_CU(cuCtxPopCurrent(&dummy)); + CHECK_CU(cuCtxDestroy(p->cuda_ctx)); + gl->DeleteTextures(2, p->gl_textures); hwdec_devices_remove(hw->devs, &p->hwctx); -- cgit v1.2.3