From 6a5e760c4d7f4a487739832e815331388ad9faa1 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 23 Jul 2020 12:50:53 +0200 Subject: vo_gpu: allow DR path even when DR is unavailable Even when using `vd-lavc-dr`, some circumstances could cause the DR code to get disabled, for example the existence of extra CPU filters that cause the resulting avbuffers to no longer correspond to `ra_bufs`. In this case, we can still enable DR, for at least the libplacebo-based contexts, by directly importing the host pointer into a dedicated, temporary ra_buf which we then free explicitly as soon as we no longer need it. I made the PL_API_VER depend on >=89 even though this functionality was introduced in API ver 85 because the initial implementation required page-aligned host pointers, whereas newer libplacebo does not. It might be worth introducing an extra option to explicitly disable this, for debugging. Ideally it'd be cool if it could also depend on the value of `vd-lavc-dr`, but I don't know how to propagate that option properly. --- video/out/gpu/ra.h | 3 +++ video/out/gpu/video.c | 31 +++++++++++++++++++++++++++---- video/out/placebo/ra_pl.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h index 4067b52425..29dcd20760 100644 --- a/video/out/gpu/ra.h +++ b/video/out/gpu/ra.h @@ -435,6 +435,9 @@ struct ra_fns { // NULL then all buffers are always usable. bool (*buf_poll)(struct ra *ra, struct ra_buf *buf); + // Create a `ra_buf` mapping an existing host pointer. May fail. Optional. + struct ra_buf *(*buf_map_ptr)(struct ra *ra, const void *ptr, size_t size); + // Returns the layout requirements of a uniform buffer element. Optional, // but must be implemented if RA_CAP_BUF_RO is supported. struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp); diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 851289e281..e70b5d7a73 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -151,6 +151,7 @@ struct pass_info { struct dr_buffer { struct ra_buf *buf; + bool dedicated; // this buffer was purpose-allocated to map one mpi // The mpi reference will keep the data from being recycled (or from other // references gaining write access) while the GPU is accessing the buffer. struct mp_image *mpi; @@ -967,16 +968,34 @@ static void init_video(struct gl_video *p) gl_video_setup_hooks(p); } -static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) +static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, + struct mp_image *mpi, int n) { - for (int i = 0; i < p->num_dr_buffers; i++) { - struct dr_buffer *buffer = &p->dr_buffers[i]; + uint8_t *ptr = mpi->planes[n]; + + for (int i = 0; i < p->num_dr_buffers; i++) { + struct dr_buffer *buffer = &p->dr_buffers[i]; uint8_t *bufptr = buffer->buf->data; size_t size = buffer->buf->params.size; if (ptr >= bufptr && ptr < bufptr + size) return buffer; } + if (p->ra->fns->buf_map_ptr) { + size_t size = mpi->stride[n] * (mpi->h >> mpi->fmt.ys[n]); + struct ra_buf *buf = p->ra->fns->buf_map_ptr(p->ra, ptr, size); + if (!buf) + return NULL; + + MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers); + p->dr_buffers[p->num_dr_buffers] = (struct dr_buffer){ + .buf = buf, + .dedicated = true, + }; + + return &p->dr_buffers[p->num_dr_buffers++]; + } + return NULL; } @@ -1000,6 +1019,10 @@ again:; struct mp_image *ref = buffer->mpi; buffer->mpi = NULL; talloc_free(ref); + if (buffer->dedicated) { + ra_buf_free(p->ra, &buffer->buf); + MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n); + } goto again; } } @@ -3593,7 +3616,7 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t params.stride = -params.stride; } - struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); + struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi, n); if (mapped) { params.buf = mapped->buf; params.buf_offset = (uintptr_t)params.src - diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c index f8df590511..17d83566a5 100644 --- a/video/out/placebo/ra_pl.c +++ b/video/out/placebo/ra_pl.c @@ -374,6 +374,47 @@ static bool buf_poll_pl(struct ra *ra, struct ra_buf *buf) return !pl_buf_poll(get_gpu(ra), buf->priv, 0); } +static struct ra_buf *buf_map_ptr_pl(struct ra *ra, const void *ptr, + size_t size) +{ +#if PL_API_VER >= 89 + + const struct pl_gpu *gpu = get_gpu(ra); + if (!(gpu->import_caps.buf & PL_HANDLE_HOST_PTR)) + return NULL; + + const struct pl_buf *plbuf; + plbuf = pl_buf_create(gpu, &(struct pl_buf_params) { + .size = size, + .host_mapped = true, + .import_handle = PL_HANDLE_HOST_PTR, + .shared_mem = { + .handle.ptr = (void *) ptr, + .size = size, + }, + }); + + if (!plbuf) + return NULL; + + struct ra_buf *rabuf = talloc_ptrtype(NULL, rabuf); + *rabuf = (struct ra_buf) { + .params = { + .type = RA_BUF_TYPE_TEX_UPLOAD, + .size = plbuf->params.size, + .host_mapped = true, + }, + .data = plbuf->data, + .priv = (void *) plbuf, + }; + + return rabuf; + +#else // PL_API_VER < 89 + return NULL; +#endif +} + static void clear_pl(struct ra *ra, struct ra_tex *dst, float color[4], struct mp_rect *scissor) { @@ -751,6 +792,7 @@ static struct ra_fns ra_fns_pl = { .buf_destroy = buf_destroy_pl, .buf_update = buf_update_pl, .buf_poll = buf_poll_pl, + .buf_map_ptr = buf_map_ptr_pl, .clear = clear_pl, .blit = blit_pl, .uniform_layout = uniform_layout_pl, -- cgit v1.2.3