summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/hwdec_cuda.c
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2017-08-10 17:48:33 +0200
committerwm4 <wm4@nowhere>2017-08-10 21:24:31 +0200
commitc6fafbffaca16959dfa2b4bf1eb97861ad66b5ef (patch)
treedecef889171128c27f3a8cb2fcfb552f1b7297be /video/out/opengl/hwdec_cuda.c
parentb2fb3f1340ed7ceb9b3fc8ba4ddec107e3a41a13 (diff)
downloadmpv-c6fafbffaca16959dfa2b4bf1eb97861ad66b5ef.tar.bz2
mpv-c6fafbffaca16959dfa2b4bf1eb97861ad66b5ef.tar.xz
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things: 1. Make the hwdec context (which does initialization, provides the device to the decoder, and other basic state) and frame mapping (getting textures from a mp_image) separate. This is more flexible, and you could map multiple images at once. It will help removing some hwdec special-casing from video.c. 2. Switch all hwdec API use to ra. Of course all code is still GL specific, but in theory it would be possible to support other backends. The most important change is that the hwdec interop returns ra objects, instead of anything GL specific. This removes the last dependency on GL-specific header files from video.c. I'm mixing these separate changes because both requires essentially rewriting all the glue code, so better do them at once. For the same reason, this change isn't done incrementally. hwdec_ios.m is untested, since I can't test it. Apart from superficial mistakes, this also requires dealing with Apple's texture format fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED and GL_RG. We also need to report the correct format via ra_tex to the renderer, which is done by find_la_variant(). It's unknown whether this works correctly. hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my RPI out of a dusty pile of devices and cables, so, later.)
Diffstat (limited to 'video/out/opengl/hwdec_cuda.c')
-rw-r--r--video/out/opengl/hwdec_cuda.c168
1 files changed, 93 insertions, 75 deletions
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c
index fd432ee7f8..d40bafee24 100644
--- a/video/out/opengl/hwdec_cuda.c
+++ b/video/out/opengl/hwdec_cuda.c
@@ -35,21 +35,24 @@
#include "formats.h"
#include "hwdec.h"
#include "options/m_config.h"
+#include "ra_gl.h"
#include "video.h"
-struct priv {
+struct priv_owner {
struct mp_hwdec_ctx hwctx;
+ CUcontext display_ctx;
+ CUcontext decode_ctx;
+};
+
+struct priv {
struct mp_image layout;
- GLuint gl_textures[4];
CUgraphicsResource cu_res[4];
CUarray cu_array[4];
- int plane_bytes[4];
CUcontext display_ctx;
- CUcontext decode_ctx;
};
-static int check_cu(struct gl_hwdec *hw, CUresult err, const char *func)
+static int check_cu(struct ra_hwdec *hw, CUresult err, const char *func)
{
const char *err_name;
const char *err_string;
@@ -72,22 +75,24 @@ static int check_cu(struct gl_hwdec *hw, CUresult err, const char *func)
#define CHECK_CU(x) check_cu(hw, (x), #x)
-static int cuda_create(struct gl_hwdec *hw)
+static int cuda_init(struct ra_hwdec *hw)
{
CUdevice display_dev;
AVBufferRef *hw_device_ctx = NULL;
CUcontext dummy;
unsigned int device_count;
int ret = 0;
+ struct priv_owner *p = hw->priv;
- if (hw->gl->version < 210 && hw->gl->es < 300) {
+ if (!ra_is_gl(hw->ra))
+ return -1;
+
+ GL *gl = ra_gl_get(hw->ra);
+ if (gl->version < 210 && gl->es < 300) {
MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
return -1;
}
- struct priv *p = talloc_zero(hw, struct priv);
- hw->priv = p;
-
bool loaded = cuda_load();
if (!loaded) {
MP_VERBOSE(hw, "Failed to load CUDA symbols\n");
@@ -171,22 +176,43 @@ static int cuda_create(struct gl_hwdec *hw)
return -1;
}
-static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
+static void cuda_uninit(struct ra_hwdec *hw)
+{
+ struct priv_owner *p = hw->priv;
+
+ if (p->hwctx.ctx)
+ hwdec_devices_remove(hw->devs, &p->hwctx);
+ av_buffer_unref(&p->hwctx.av_device_ref);
+
+ if (p->decode_ctx && p->decode_ctx != p->display_ctx)
+ CHECK_CU(cuCtxDestroy(p->decode_ctx));
+
+ if (p->display_ctx)
+ CHECK_CU(cuCtxDestroy(p->display_ctx));
+}
+
+#undef CHECK_CU
+#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
{
- struct priv *p = hw->priv;
- GL *gl = hw->gl;
+ struct priv_owner *p_owner = mapper->owner->priv;
+ struct priv *p = mapper->priv;
CUcontext dummy;
int ret = 0, eret = 0;
- assert(params->imgfmt == hw->driver->imgfmt);
- params->imgfmt = params->hw_subfmt;
- params->hw_subfmt = 0;
+ p->display_ctx = p_owner->display_ctx;
- mp_image_set_params(&p->layout, params);
+ int imgfmt = mapper->src_params.hw_subfmt;
+ mapper->dst_params = mapper->src_params;
+ mapper->dst_params.imgfmt = imgfmt;
+ mapper->dst_params.hw_subfmt = 0;
- struct gl_imgfmt_desc desc;
- if (!gl_get_imgfmt_desc(gl, params->imgfmt, &desc)) {
- MP_ERR(hw, "Unsupported format: %s\n", mp_imgfmt_to_name(params->imgfmt));
+ mp_image_set_params(&p->layout, &mapper->dst_params);
+
+ struct ra_imgfmt_desc desc;
+ if (!ra_get_imgfmt_desc(mapper->ra, imgfmt, &desc)) {
+ MP_ERR(mapper, "Unsupported format: %s\n", mp_imgfmt_to_name(imgfmt));
return -1;
}
@@ -194,26 +220,30 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
if (ret < 0)
return ret;
- gl->GenTextures(4, p->gl_textures);
for (int n = 0; n < desc.num_planes; n++) {
- const struct gl_format *fmt = desc.planes[n];
-
- p->plane_bytes[n] = gl_bytes_per_pixel(fmt->format, fmt->type);
-
- gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]);
- GLenum filter = GL_LINEAR;
- gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter);
- gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter);
- gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format,
- mp_image_plane_w(&p->layout, n),
- mp_image_plane_h(&p->layout, n),
- 0, fmt->format, fmt->type, NULL);
- gl->BindTexture(GL_TEXTURE_2D, 0);
-
- ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], p->gl_textures[n],
- GL_TEXTURE_2D,
+ const struct ra_format *format = desc.planes[n];
+
+ struct ra_tex_params params = {
+ .dimensions = 2,
+ .w = mp_image_plane_w(&p->layout, n),
+ .h = mp_image_plane_h(&p->layout, n),
+ .d = 1,
+ .format = format,
+ .render_src = true,
+ .src_linear = format->linear_filter,
+ };
+
+ mapper->tex[n] = ra_tex_create(mapper->ra, &params);
+ if (!mapper->tex[n]) {
+ ret = -1;
+ goto error;
+ }
+
+ GLuint texture;
+ GLenum target;
+ ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target);
+
+ ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target,
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
if (ret < 0)
goto error;
@@ -240,10 +270,9 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
return ret;
}
-static void destroy(struct gl_hwdec *hw)
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
{
- struct priv *p = hw->priv;
- GL *gl = hw->gl;
+ struct priv *p = mapper->priv;
CUcontext dummy;
// Don't bail if any CUDA calls fail. This is all best effort.
@@ -252,25 +281,18 @@ static void destroy(struct gl_hwdec *hw)
if (p->cu_res[n] > 0)
CHECK_CU(cuGraphicsUnregisterResource(p->cu_res[n]));
p->cu_res[n] = 0;
+ ra_tex_free(mapper->ra, &mapper->tex[n]);
}
CHECK_CU(cuCtxPopCurrent(&dummy));
+}
- if (p->decode_ctx != p->display_ctx) {
- CHECK_CU(cuCtxDestroy(p->decode_ctx));
- }
-
- CHECK_CU(cuCtxDestroy(p->display_ctx));
-
- gl->DeleteTextures(4, p->gl_textures);
-
- hwdec_devices_remove(hw->devs, &p->hwctx);
- av_buffer_unref(&p->hwctx.av_device_ref);
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
}
-static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
- struct gl_hwdec_frame *out_frame)
+static int mapper_map(struct ra_hwdec_mapper *mapper)
{
- struct priv *p = hw->priv;
+ struct priv *p = mapper->priv;
CUcontext dummy;
int ret = 0, eret = 0;
@@ -278,31 +300,21 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
if (ret < 0)
return ret;
- *out_frame = (struct gl_hwdec_frame) { 0, };
-
for (int n = 0; n < p->layout.num_planes; n++) {
- // widthInBytes must account for the chroma plane
- // elements being two samples wide.
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.dstMemoryType = CU_MEMORYTYPE_ARRAY,
- .srcDevice = (CUdeviceptr)hw_image->planes[n],
- .srcPitch = hw_image->stride[n],
+ .srcDevice = (CUdeviceptr)mapper->src->planes[n],
+ .srcPitch = mapper->src->stride[n],
.srcY = 0,
.dstArray = p->cu_array[n],
- .WidthInBytes = mp_image_plane_w(&p->layout, n) * p->plane_bytes[n],
+ .WidthInBytes = mp_image_plane_w(&p->layout, n) *
+ mapper->tex[n]->params.format->pixel_size,
.Height = mp_image_plane_h(&p->layout, n),
};
ret = CHECK_CU(cuMemcpy2D(&cpy));
if (ret < 0)
goto error;
-
- out_frame->planes[n] = (struct gl_hwdec_plane){
- .gl_texture = p->gl_textures[n],
- .gl_target = GL_TEXTURE_2D,
- .tex_w = mp_image_plane_w(&p->layout, n),
- .tex_h = mp_image_plane_h(&p->layout, n),
- };
}
@@ -314,12 +326,18 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
return ret;
}
-const struct gl_hwdec_driver gl_hwdec_cuda = {
+const struct ra_hwdec_driver ra_hwdec_cuda = {
.name = "cuda",
.api = HWDEC_CUDA,
- .imgfmt = IMGFMT_CUDA,
- .create = cuda_create,
- .reinit = reinit,
- .map_frame = map_frame,
- .destroy = destroy,
+ .imgfmts = {IMGFMT_CUDA, 0},
+ .priv_size = sizeof(struct priv_owner),
+ .init = cuda_init,
+ .uninit = cuda_uninit,
+ .mapper = &(const struct ra_hwdec_mapper_driver){
+ .priv_size = sizeof(struct priv),
+ .init = mapper_init,
+ .uninit = mapper_uninit,
+ .map = mapper_map,
+ .unmap = mapper_unmap,
+ },
};