summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst10
-rw-r--r--options/options.c7
-rw-r--r--options/options.h2
-rw-r--r--video/out/opengl/cuda_dynamic.h3
-rw-r--r--video/out/opengl/hwdec_cuda.c56
5 files changed, 66 insertions, 12 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 925c501881..2e781e2272 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4802,6 +4802,16 @@ The following video options are currently all specific to ``--vo=opengl`` and
This option might be silently removed in the future, if ANGLE fixes shader
compilation speed.
+``--cuda-decode-device=<auto|0..>``
+ Choose the GPU device used for decoding when using the ``cuda`` hwdec.
+
+ By default, the device that is being used to provide OpenGL output will
+ also be used for decoding (and in the vast majority of cases, only one
+ GPU will be present).
+
+ Note that when using the ``cuda-copy`` hwdec, a different option must be
+ passed: ``--vd-lavc-o=gpu=<0..>``.
+
Miscellaneous
-------------
diff --git a/options/options.c b/options/options.c
index 1540dcbbf4..6664820ae3 100644
--- a/options/options.c
+++ b/options/options.c
@@ -728,6 +728,11 @@ const m_option_t mp_opts[] = {
({"no", -1}, {"auto", 0}, {"windowed", 1}, {"yes", 2})),
#endif
+#if HAVE_CUDA_HWACCEL
+ OPT_CHOICE_OR_INT("cuda-decode-device", cuda_device, 0,
+ 0, INT_MAX, ({"auto", -1})),
+#endif
+
#if HAVE_ENCODING
OPT_SUBSTRUCT("", encode_opts, encode_config, 0),
#endif
@@ -973,6 +978,8 @@ const struct MPOpts mp_default_opts = {
"Performer", "Title", "Track", "icy-title", "service_name",
NULL
},
+
+ .cuda_device = -1,
};
#endif /* MPLAYER_CFG_MPLAYER_H */
diff --git a/options/options.h b/options/options.h
index bfe7d42bcf..851b9c507f 100644
--- a/options/options.h
+++ b/options/options.h
@@ -336,6 +336,8 @@ typedef struct MPOpts {
struct angle_opts *angle_opts;
struct cocoa_opts *cocoa_opts;
struct dvd_opts *dvd_opts;
+
+ int cuda_device;
} MPOpts;
struct dvd_opts {
diff --git a/video/out/opengl/cuda_dynamic.h b/video/out/opengl/cuda_dynamic.h
index e1ffc6e8c1..9d75b31b7a 100644
--- a/video/out/opengl/cuda_dynamic.h
+++ b/video/out/opengl/cuda_dynamic.h
@@ -94,6 +94,7 @@ typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CU
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx);
typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
+typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *pdevice, int ordinal);
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
@@ -110,6 +111,7 @@ typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, C
FN(cuCtxPushCurrent_v2, tcuCtxPushCurrent_v2) \
FN(cuCtxPopCurrent_v2, tcuCtxPopCurrent_v2) \
FN(cuCtxDestroy_v2, tcuCtxDestroy_v2) \
+ FN(cuDeviceGet, tcuDeviceGet) \
FN(cuMemcpy2D_v2, tcuMemcpy2D_v2) \
FN(cuGetErrorName, tcuGetErrorName) \
FN(cuGetErrorString, tcuGetErrorString) \
@@ -130,6 +132,7 @@ CUDA_FNS(CUDA_EXT_DECL)
#define cuCtxPushCurrent mpv_cuCtxPushCurrent_v2
#define cuCtxPopCurrent mpv_cuCtxPopCurrent_v2
#define cuCtxDestroy mpv_cuCtxDestroy_v2
+#define cuDeviceGet mpv_cuDeviceGet
#define cuMemcpy2D mpv_cuMemcpy2D_v2
#define cuGetErrorName mpv_cuGetErrorName
#define cuGetErrorString mpv_cuGetErrorString
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c
index e64de97fd3..d02826701a 100644
--- a/video/out/opengl/hwdec_cuda.c
+++ b/video/out/opengl/hwdec_cuda.c
@@ -34,6 +34,7 @@
#include "formats.h"
#include "hwdec.h"
+#include "options/m_config.h"
#include "video.h"
struct priv {
@@ -44,7 +45,8 @@ struct priv {
CUarray cu_array[4];
int plane_bytes[4];
- CUcontext cuda_ctx;
+ CUcontext display_ctx;
+ CUcontext decode_ctx;
};
static int check_cu(struct gl_hwdec *hw, CUresult err, const char *func)
@@ -72,8 +74,7 @@ static int check_cu(struct gl_hwdec *hw, CUresult err, const char *func)
static int cuda_create(struct gl_hwdec *hw)
{
- CUdevice device;
- CUcontext cuda_ctx = NULL;
+ CUdevice display_dev;
AVBufferRef *hw_device_ctx = NULL;
CUcontext dummy;
unsigned int device_count;
@@ -97,16 +98,43 @@ static int cuda_create(struct gl_hwdec *hw)
if (ret < 0)
goto error;
- ret = CHECK_CU(cuGLGetDevices(&device_count, &device, 1,
+ // Allocate display context
+ ret = CHECK_CU(cuGLGetDevices(&device_count, &display_dev, 1,
CU_GL_DEVICE_LIST_ALL));
if (ret < 0)
goto error;
- ret = CHECK_CU(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, device));
+ ret = CHECK_CU(cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+ display_dev));
if (ret < 0)
goto error;
- p->cuda_ctx = cuda_ctx;
+ p->decode_ctx = p->display_ctx;
+
+ int decode_dev_idx = -1;
+ mp_read_option_raw(hw->global, "cuda-decode-device", &m_option_type_choice,
+ &decode_dev_idx);
+
+ if (decode_dev_idx > -1) {
+ CUdevice decode_dev;
+ ret = CHECK_CU(cuDeviceGet(&decode_dev, decode_dev_idx));
+ if (ret < 0)
+ goto error;
+
+ if (decode_dev != display_dev) {
+ MP_INFO(hw, "Using separate decoder and display devices\n");
+
+ // Pop the display context. We won't use it again during init()
+ ret = CHECK_CU(cuCtxPopCurrent(&dummy));
+ if (ret < 0)
+ goto error;
+
+ ret = CHECK_CU(cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+ decode_dev));
+ if (ret < 0)
+ goto error;
+ }
+ }
hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
if (!hw_device_ctx)
@@ -115,7 +143,7 @@ static int cuda_create(struct gl_hwdec *hw)
AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;
AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
- device_hwctx->cuda_ctx = cuda_ctx;
+ device_hwctx->cuda_ctx = p->decode_ctx;
ret = av_hwdevice_ctx_init(hw_device_ctx);
if (ret < 0) {
@@ -129,7 +157,7 @@ static int cuda_create(struct gl_hwdec *hw)
p->hwctx = (struct mp_hwdec_ctx) {
.type = HWDEC_CUDA,
- .ctx = cuda_ctx,
+ .ctx = p->decode_ctx,
.av_device_ref = hw_device_ctx,
};
p->hwctx.driver_name = hw->driver->name;
@@ -162,7 +190,7 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
return -1;
}
- ret = CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
+ ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx));
if (ret < 0)
return ret;
@@ -219,7 +247,7 @@ static void destroy(struct gl_hwdec *hw)
CUcontext dummy;
// Don't bail if any CUDA calls fail. This is all best effort.
- CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
+ CHECK_CU(cuCtxPushCurrent(p->display_ctx));
for (int n = 0; n < 4; n++) {
if (p->cu_res[n] > 0)
CHECK_CU(cuGraphicsUnregisterResource(p->cu_res[n]));
@@ -227,7 +255,11 @@ static void destroy(struct gl_hwdec *hw)
}
CHECK_CU(cuCtxPopCurrent(&dummy));
- CHECK_CU(cuCtxDestroy(p->cuda_ctx));
+ if (p->decode_ctx != p->display_ctx) {
+ CHECK_CU(cuCtxDestroy(p->decode_ctx));
+ }
+
+ CHECK_CU(cuCtxDestroy(p->display_ctx));
gl->DeleteTextures(4, p->gl_textures);
@@ -242,7 +274,7 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
CUcontext dummy;
int ret = 0, eret = 0;
- ret = CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
+ ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx));
if (ret < 0)
return ret;