summaryrefslogtreecommitdiffstats
path: root/video/out/hwdec/hwdec_cuda.c
diff options
context:
space:
mode:
authorPhilip Langdale <philipl@overt.org>2019-02-02 15:24:27 -0800
committersfan5 <sfan5@live.de>2019-05-03 18:02:18 +0200
commit23a324215b5aa9ea66a5d19fe7c5322c871eda64 (patch)
tree371e786c66b52d9bdf33f258caf67ae8fa9987a2 /video/out/hwdec/hwdec_cuda.c
parent738fda36774b1e5bc0f7277178ee7d762880922e (diff)
downloadmpv-23a324215b5aa9ea66a5d19fe7c5322c871eda64.tar.bz2
mpv-23a324215b5aa9ea66a5d19fe7c5322c871eda64.tar.xz
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files
The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location.
Diffstat (limited to 'video/out/hwdec/hwdec_cuda.c')
-rw-r--r--video/out/hwdec/hwdec_cuda.c271
1 files changed, 271 insertions, 0 deletions
diff --git a/video/out/hwdec/hwdec_cuda.c b/video/out/hwdec/hwdec_cuda.c
new file mode 100644
index 0000000000..78eb936f17
--- /dev/null
+++ b/video/out/hwdec/hwdec_cuda.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2016 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This hwdec implements an optimized output path using CUDA->OpenGL
+ * or CUDA->Vulkan interop for frame data that is stored in CUDA
+ * device memory. Although it is not explicit in the code here, the
+ * only practical way to get data in this form is from the
+ * nvdec/cuvid decoder.
+ */
+
+#include "config.h"
+#include "hwdec_cuda.h"
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_cuda.h>
+
+int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)
+{
+ const char *err_name;
+ const char *err_string;
+
+ struct cuda_hw_priv *p = hw->priv;
+
+ MP_TRACE(hw, "Calling %s\n", func);
+
+ if (err == CUDA_SUCCESS)
+ return 0;
+
+ p->cu->cuGetErrorName(err, &err_name);
+ p->cu->cuGetErrorString(err, &err_string);
+
+ MP_ERR(hw, "%s failed", func);
+ if (err_name && err_string)
+ MP_ERR(hw, " -> %s: %s", err_name, err_string);
+ MP_ERR(hw, "\n");
+
+ return -1;
+}
+
+#define CHECK_CU(x) check_cu(hw, (x), #x)
+
+static int cuda_init(struct ra_hwdec *hw)
+{
+ AVBufferRef *hw_device_ctx = NULL;
+ CUcontext dummy;
+ int ret = 0;
+ struct cuda_hw_priv *p = hw->priv;
+ CudaFunctions *cu;
+
+ ret = cuda_load_functions(&p->cu, NULL);
+ if (ret != 0) {
+ MP_VERBOSE(hw, "Failed to load CUDA symbols\n");
+ return -1;
+ }
+ cu = p->cu;
+
+ ret = CHECK_CU(cu->cuInit(0));
+ if (ret < 0)
+ return -1;
+
+ // Initialise CUDA context from backend.
+ // We call all possible inits because these will do nothing if the ra context
+ // doesn't match.
+#if HAVE_GL
+ if (!cuda_gl_init(hw)) {
+ return -1;
+ }
+#endif
+#if HAVE_VULKAN
+ if (!cuda_vk_init(hw)) {
+ return -1;
+ }
+#endif
+
+ if (!p->ext_init || !p->ext_uninit) {
+ MP_VERBOSE(hw, "CUDA hwdec only works with OpenGL or Vulkan backends.\n");
+ return -1;
+ }
+
+ hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
+ if (!hw_device_ctx)
+ goto error;
+
+ AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;
+
+ AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
+ device_hwctx->cuda_ctx = p->decode_ctx;
+
+ ret = av_hwdevice_ctx_init(hw_device_ctx);
+ if (ret < 0) {
+ MP_ERR(hw, "av_hwdevice_ctx_init failed\n");
+ goto error;
+ }
+
+ ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+ if (ret < 0)
+ goto error;
+
+ p->hwctx = (struct mp_hwdec_ctx) {
+ .driver_name = hw->driver->name,
+ .av_device_ref = hw_device_ctx,
+ };
+ hwdec_devices_add(hw->devs, &p->hwctx);
+ return 0;
+
+ error:
+ av_buffer_unref(&hw_device_ctx);
+ CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+
+ return -1;
+}
+
+static void cuda_uninit(struct ra_hwdec *hw)
+{
+ struct cuda_hw_priv *p = hw->priv;
+ CudaFunctions *cu = p->cu;
+
+ hwdec_devices_remove(hw->devs, &p->hwctx);
+ av_buffer_unref(&p->hwctx.av_device_ref);
+
+ if (p->decode_ctx && p->decode_ctx != p->display_ctx)
+ CHECK_CU(cu->cuCtxDestroy(p->decode_ctx));
+
+ if (p->display_ctx)
+ CHECK_CU(cu->cuCtxDestroy(p->display_ctx));
+
+ cuda_free_functions(&p->cu);
+}
+
+#undef CHECK_CU
+#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+ struct cuda_hw_priv *p_owner = mapper->owner->priv;
+ struct cuda_mapper_priv *p = mapper->priv;
+ CUcontext dummy;
+ CudaFunctions *cu = p_owner->cu;
+ int ret = 0, eret = 0;
+
+ p->display_ctx = p_owner->display_ctx;
+
+ int imgfmt = mapper->src_params.hw_subfmt;
+ mapper->dst_params = mapper->src_params;
+ mapper->dst_params.imgfmt = imgfmt;
+ mapper->dst_params.hw_subfmt = 0;
+
+ mp_image_set_params(&p->layout, &mapper->dst_params);
+
+ struct ra_imgfmt_desc desc;
+ if (!ra_get_imgfmt_desc(mapper->ra, imgfmt, &desc)) {
+ MP_ERR(mapper, "Unsupported format: %s\n", mp_imgfmt_to_name(imgfmt));
+ return -1;
+ }
+
+ ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
+ if (ret < 0)
+ return ret;
+
+ for (int n = 0; n < desc.num_planes; n++) {
+ if (!p_owner->ext_init(mapper, desc.planes[n], n))
+ goto error;
+ }
+
+ error:
+ eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+ if (eret < 0)
+ return eret;
+
+ return ret;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+ struct cuda_mapper_priv *p = mapper->priv;
+ struct cuda_hw_priv *p_owner = mapper->owner->priv;
+ CudaFunctions *cu = p_owner->cu;
+ CUcontext dummy;
+
+ // Don't bail if any CUDA calls fail. This is all best effort.
+ CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
+ for (int n = 0; n < 4; n++) {
+ p_owner->ext_uninit(mapper, n);
+ ra_tex_free(mapper->ra, &mapper->tex[n]);
+ }
+ CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+ struct cuda_mapper_priv *p = mapper->priv;
+ struct cuda_hw_priv *p_owner = mapper->owner->priv;
+ CudaFunctions *cu = p_owner->cu;
+ CUcontext dummy;
+ int ret = 0, eret = 0;
+
+ ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
+ if (ret < 0)
+ return ret;
+
+ for (int n = 0; n < p->layout.num_planes; n++) {
+ if (p_owner->ext_wait) {
+ if (!p_owner->ext_wait(mapper, n))
+ goto error;
+ }
+
+ CUDA_MEMCPY2D cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+ .srcDevice = (CUdeviceptr)mapper->src->planes[n],
+ .srcPitch = mapper->src->stride[n],
+ .srcY = 0,
+ .dstMemoryType = CU_MEMORYTYPE_ARRAY,
+ .dstArray = p->cu_array[n],
+ .WidthInBytes = mp_image_plane_w(&p->layout, n) *
+ mapper->tex[n]->params.format->pixel_size,
+ .Height = mp_image_plane_h(&p->layout, n),
+ };
+
+ ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, 0));
+ if (ret < 0)
+ goto error;
+
+ if (p_owner->ext_signal) {
+ if (!p_owner->ext_signal(mapper, n))
+ goto error;
+ }
+ }
+
+ error:
+ eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+ if (eret < 0)
+ return eret;
+
+ return ret;
+}
+
+const struct ra_hwdec_driver ra_hwdec_cuda = {
+ .name = "cuda-nvdec",
+ .imgfmts = {IMGFMT_CUDA, 0},
+ .priv_size = sizeof(struct cuda_hw_priv),
+ .init = cuda_init,
+ .uninit = cuda_uninit,
+ .mapper = &(const struct ra_hwdec_mapper_driver){
+ .priv_size = sizeof(struct cuda_mapper_priv),
+ .init = mapper_init,
+ .uninit = mapper_uninit,
+ .map = mapper_map,
+ .unmap = mapper_unmap,
+ },
+};