diff options
author | Niklas Haas <git@haasn.xyz> | 2017-08-16 22:13:51 +0200 |
---|---|---|
committer | Niklas Haas <git@haasn.xyz> | 2017-08-18 00:34:34 +0200 |
commit | 46d86da6300ebcd2134996c76b9238fcf8e0fb6e (patch) | |
tree | a9ac0b463aba5ea47112b5e93ec6570f7b37fbe2 /video/out/opengl/video.c | |
parent | 9ca5a2a5d839476d8a597fcc124cce41279928bc (diff) | |
download | mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.bz2 mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.xz |
vo_opengl: refactor RA texture and buffer updates
- tex_uploads args are moved to a struct
- the ability to directly upload texture data without going through a
buffer is made explicit
- the concept of buffer updates and buffer polling is made more explicit
and generalized to buf_update as well (not just mapped buffers)
- the ability to call tex_upload/buf_update on a tex/buf is made
explicit during tex/buf creation
- uploading from buffers now uses an explicit offset instead of
implicitly comparing *src against buf->data, because not all buffers
may actually be persistently mapped
- the initial_data = immutable requirement is dropped. (May be re-added
later for D3D11 if that ever becomes a thing)
This change helps the vulkan abstraction immensely and also helps move
common code (like the PBO pooling) out of ra_gl and into the
opengl/utils.c
This also technically has the side-benefit / side-constraint of using
PBOs for OSD texture uploads as well, which actually seems to help
performance on machines where --opengl-pbo is faster than the naive code
path. Because of this, I decided to hook up the OSD code to the
opengl-pbo option as well.
One drawback of this refactor is that the GL_STREAM_COPY hack for
texture uploads "got lost", but I think I'm happy with that going away
anyway since DR almost fully deprecates it, and it's not the "right
thing" anyway - but instead an nvidia-only hack to make this stuff work
somewhat better on NUMA systems with discrete GPUs.
Another change is that due to the way fencing works with ra_buf (we get
one fence per ra_buf per upload) we have to use multiple ra_bufs instead
of offsets into a shared buffer. But for OpenGL this is probably better
anyway. It's possible that in future, we could support having
independent “buffer slices” (each with their own fence/sync object), but
this would be an optimization more than anything. I also think that we
could address the underlying problem (memory closeness) differently by
making the ra_vk memory allocator smart enough to chunk together
allocations under the hood.
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r-- | video/out/opengl/video.c | 42 |
1 files changed, 26 insertions, 16 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index a9c64c338f..e8ff23f2b4 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -84,6 +84,7 @@ static const struct ra_renderpass_input vertex_vao[] = { struct texplane { struct ra_tex *tex; + struct tex_upload pbo; int w, h; bool flipped; }; @@ -493,7 +494,7 @@ static void reinit_osd(struct gl_video *p) mpgl_osd_destroy(p->osd); p->osd = NULL; if (p->osd_state) - p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state); + p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state, p->opts.pbo); } static void uninit_rendering(struct gl_video *p) @@ -882,6 +883,7 @@ static void init_video(struct gl_video *p) .render_src = true, .src_linear = format->linear_filter, .non_normalized = p->opts.use_rectangle, + .host_mutable = true, }; MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, @@ -935,7 +937,7 @@ again:; if (!buffer->mpi) continue; - bool res = p->ra->fns->poll_mapped_buffer(p->ra, buffer->buf); + bool res = p->ra->fns->buf_poll(p->ra, buffer->buf); if (res || force) { // Unreferencing the image could cause gl_video_dr_free_buffer() // to be called by the talloc destructor (if it was the last @@ -984,8 +986,8 @@ static void uninit_video(struct gl_video *p) for (int n = 0; n < p->plane_count; n++) { struct texplane *plane = &vimg->planes[n]; - ra_tex_free(p->ra, &plane->tex); + tex_upload_uninit(p->ra, &plane->pbo); } *vimg = (struct video_image){0}; @@ -3269,19 +3271,33 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t plane->flipped = mpi->stride[0] < 0; - struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); - - p->ra->fns->tex_upload(p->ra, plane->tex, mpi->planes[n], - mpi->stride[n], NULL, 0, - mapped ? mapped->buf : NULL); + struct ra_tex_upload_params params = { + .tex = plane->tex, + .src = mpi->planes[n], + .invalidate = true, + .stride = mpi->stride[n], + }; - if (mapped && !mapped->mpi) - mapped->mpi = mp_image_new_ref(mpi); + struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); + if (mapped) { + params.buf = mapped->buf; + params.buf_offset = (uintptr_t)params.src - + (uintptr_t)mapped->buf->data; + params.src = NULL; + } if (p->using_dr_path != !!mapped) { p->using_dr_path = !!mapped; MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no"); } + + if (!tex_upload(p->ra, &plane->pbo, p->opts.pbo, ¶ms)) { + timer_pool_stop(p->upload_timer); + goto error; + } + + if (mapped && !mapped->mpi) + mapped->mpi = mp_image_new_ref(mpi); } timer_pool_stop(p->upload_timer); const char *mode = p->using_dr_path ? "DR" : p->opts.pbo ? "PBO" : "naive"; @@ -3367,11 +3383,6 @@ static void check_gl_features(struct gl_video *p) } } - if (!(ra->caps & RA_CAP_PBO) && p->opts.pbo) { - p->opts.pbo = 0; - MP_WARN(p, "Disabling PBOs (GL2.1/GLES2 unsupported).\n"); - } - p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg; bool voluntarily_dumb = check_dumb_mode(p); if (p->forced_dumb_mode || voluntarily_dumb) { @@ -3628,7 +3639,6 @@ static void reinit_from_options(struct gl_video *p) check_gl_features(p); uninit_rendering(p); gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir); - p->ra->use_pbo = p->opts.pbo; gl_video_setup_hooks(p); reinit_osd(p); |