diff options
Diffstat (limited to 'video/out/opengl/hwdec_cuda.c')
-rw-r--r-- | video/out/opengl/hwdec_cuda.c | 158 |
1 files changed, 66 insertions, 92 deletions
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index 4edad96c7a..40c4e19041 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -28,16 +28,21 @@ */ #include "cuda_dynamic.h" -#include "video/mp_image_pool.h" + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_cuda.h> + +#include "formats.h" #include "hwdec.h" #include "video.h" struct priv { struct mp_hwdec_ctx hwctx; struct mp_image layout; - GLuint gl_textures[2]; - CUgraphicsResource cu_res[2]; - CUarray cu_array[2]; + GLuint gl_textures[4]; + CUgraphicsResource cu_res[4]; + CUarray cu_array[4]; + int sample_count[4]; int sample_width; CUcontext cuda_ctx; @@ -66,79 +71,14 @@ static int check_cu(struct gl_hwdec *hw, CUresult err, const char *func) #define CHECK_CU(x) check_cu(hw, (x), #x) -static struct mp_image *cuda_download_image(struct mp_hwdec_ctx *ctx, - struct mp_image *hw_image, - struct mp_image_pool *swpool) -{ - CUcontext cuda_ctx = ctx->ctx; - CUcontext dummy; - CUresult err, eerr; - - if (hw_image->imgfmt != IMGFMT_CUDA) - return NULL; - - int sample_width; - switch (hw_image->params.hw_subfmt) { - case IMGFMT_NV12: - sample_width = 1; - break; - case IMGFMT_P010: - case IMGFMT_P016: - sample_width = 2; - break; - default: - return NULL; - } - - struct mp_image *out = mp_image_pool_get(swpool, - hw_image->params.hw_subfmt, - hw_image->w, hw_image->h); - if (!out) - return NULL; - - err = cuCtxPushCurrent(cuda_ctx); - if (err != CUDA_SUCCESS) - goto error; - - mp_image_set_size(out, hw_image->w, hw_image->h); - mp_image_copy_attributes(out, hw_image); - - for (int n = 0; n < 2; n++) { - CUDA_MEMCPY2D cpy = { - .srcMemoryType = CU_MEMORYTYPE_DEVICE, - .dstMemoryType = CU_MEMORYTYPE_HOST, - .srcDevice = (CUdeviceptr)hw_image->planes[n], - .dstHost = out->planes[n], - .srcPitch = hw_image->stride[n], - .dstPitch = out->stride[n], - .WidthInBytes = mp_image_plane_w(out, n) * - (n + 1) * sample_width, - .Height = mp_image_plane_h(out, n), - }; - - err = cuMemcpy2D(&cpy); - if (err != CUDA_SUCCESS) { - goto error; - } - } - - error: - eerr = cuCtxPopCurrent(&dummy); - if (eerr != CUDA_SUCCESS || err != CUDA_SUCCESS) { - talloc_free(out); - return NULL; - } - - return out; -} - static int cuda_create(struct gl_hwdec *hw) { CUdevice device; CUcontext cuda_ctx = NULL; + AVBufferRef *hw_device_ctx = NULL; CUcontext dummy; unsigned int device_count; - int ret = 0, eret = 0; + int ret = 0; if (hw->gl->version < 210 && hw->gl->es < 300) { MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n"); @@ -169,20 +109,39 @@ static int cuda_create(struct gl_hwdec *hw) p->cuda_ctx = cuda_ctx; + hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if (!hw_device_ctx) + goto error; + + AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data; + + AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; + device_hwctx->cuda_ctx = cuda_ctx; + + ret = av_hwdevice_ctx_init(hw_device_ctx); + if (ret < 0) { + MP_ERR(hw, "av_hwdevice_ctx_init failed\n"); + goto error; + } + + ret = CHECK_CU(cuCtxPopCurrent(&dummy)); + if (ret < 0) + goto error; + p->hwctx = (struct mp_hwdec_ctx) { .type = HWDEC_CUDA, .ctx = cuda_ctx, - .download_image = cuda_download_image, + .av_device_ref = hw_device_ctx, }; p->hwctx.driver_name = hw->driver->name; hwdec_devices_add(hw->devs, &p->hwctx); + return 0; error: - eret = CHECK_CU(cuCtxPopCurrent(&dummy)); - if (eret < 0) - return eret; + av_buffer_unref(&hw_device_ctx); + CHECK_CU(cuCtxPopCurrent(&dummy)); - return ret; + return -1; } static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) @@ -198,21 +157,25 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) mp_image_set_params(&p->layout, params); - GLint luma_format, chroma_format; - GLenum type; + for (int n = 0; n < 4; n++) + p->sample_count[n] = 0; + switch (params->imgfmt) { case IMGFMT_NV12: - luma_format = GL_R8; - chroma_format = GL_RG8; - type = GL_UNSIGNED_BYTE; p->sample_width = 1; + p->sample_count[0] = 1; + p->sample_count[1] = 2; break; case IMGFMT_P010: case IMGFMT_P016: - luma_format = GL_R16; - chroma_format = GL_RG16; - type = GL_UNSIGNED_SHORT; p->sample_width = 2; + p->sample_count[0] = 1; + p->sample_count[1] = 2; + break; + case IMGFMT_420P: + p->sample_width = 1; + for (int n = 0; n < 3; n++) + p->sample_count[n] = 1; break; default: MP_ERR(hw, "Unsupported format: %s\n", mp_imgfmt_to_name(params->imgfmt)); @@ -223,18 +186,24 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) if (ret < 0) return ret; - gl->GenTextures(2, p->gl_textures); - for (int n = 0; n < 2; n++) { + gl->GenTextures(4, p->gl_textures); + for (int n = 0; n < 4; n++) { + if (!p->sample_count[n]) + break; + + const struct gl_format *fmt = + gl_find_unorm_format(gl, p->sample_width, p->sample_count[n]); + gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]); GLenum filter = GL_NEAREST; gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - gl->TexImage2D(GL_TEXTURE_2D, 0, n == 0 ? luma_format : chroma_format, + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, mp_image_plane_w(&p->layout, n), mp_image_plane_h(&p->layout, n), - 0, n == 0 ? GL_RED : GL_RG, type, NULL); + 0, fmt->format, fmt->type, NULL); gl->BindTexture(GL_TEXTURE_2D, 0); ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], p->gl_textures[n], @@ -273,17 +242,19 @@ static void destroy(struct gl_hwdec *hw) // Don't bail if any CUDA calls fail. This is all best effort. CHECK_CU(cuCtxPushCurrent(p->cuda_ctx)); - for (int n = 0; n < 2; n++) { + for (int n = 0; n < 4; n++) { if (p->cu_res[n] > 0) CHECK_CU(cuGraphicsUnregisterResource(p->cu_res[n])); + p->cu_res[n] = 0; } CHECK_CU(cuCtxPopCurrent(&dummy)); CHECK_CU(cuCtxDestroy(p->cuda_ctx)); - gl->DeleteTextures(2, p->gl_textures); + gl->DeleteTextures(4, p->gl_textures); hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); } static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, @@ -299,7 +270,10 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, *out_frame = (struct gl_hwdec_frame) { 0, }; - for (int n = 0; n < 2; n++) { + for (int n = 0; n < 4; n++) { + if (!p->sample_count[n]) + break; + // widthInBytes must account for the chroma plane // elements being two samples wide. CUDA_MEMCPY2D cpy = { @@ -310,7 +284,7 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, .srcY = 0, .dstArray = p->cu_array[n], .WidthInBytes = mp_image_plane_w(&p->layout, n) * - (n + 1) * p->sample_width, + p->sample_count[n] * p->sample_width, .Height = mp_image_plane_h(&p->layout, n), }; ret = CHECK_CU(cuMemcpy2D(&cpy)); |