diff options
author | Niklas Haas <git@haasn.xyz> | 2017-08-16 22:13:51 +0200 |
---|---|---|
committer | Niklas Haas <git@haasn.xyz> | 2017-08-18 00:34:34 +0200 |
commit | 46d86da6300ebcd2134996c76b9238fcf8e0fb6e (patch) | |
tree | a9ac0b463aba5ea47112b5e93ec6570f7b37fbe2 /video/out/opengl/ra_gl.c | |
parent | 9ca5a2a5d839476d8a597fcc124cce41279928bc (diff) | |
download | mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.bz2 mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.xz |
vo_opengl: refactor RA texture and buffer updates
- tex_uploads args are moved to a struct
- the ability to directly upload texture data without going through a
buffer is made explicit
- the concept of buffer updates and buffer polling is made more explicit
and generalized to buf_update as well (not just mapped buffers)
- the ability to call tex_upload/buf_update on a tex/buf is made
explicit during tex/buf creation
- uploading from buffers now uses an explicit offset instead of
implicitly comparing *src against buf->data, because not all buffers
may actually be persistently mapped
- the initial_data = immutable requirement is dropped. (May be re-added
later for D3D11 if that ever becomes a thing)
This change helps the vulkan abstraction immensely and also helps move
common code (like the PBO pooling) out of ra_gl and into the
opengl/utils.c
This also technically has the side-benefit / side-constraint of using
PBOs for OSD texture uploads as well, which actually seems to help
performance on machines where --opengl-pbo is faster than the naive code
path. Because of this, I decided to hook up the OSD code to the
opengl-pbo option as well.
One drawback of this refactor is that the GL_STREAM_COPY hack for
texture uploads "got lost", but I think I'm happy with that going away
anyway since DR almost fully deprecates it, and it's not the "right
thing" anyway - but instead an nvidia-only hack to make this stuff work
somewhat better on NUMA systems with discrete GPUs.
Another change is that due to the way fencing works with ra_buf (we get
one fence per ra_buf per upload) we have to use multiple ra_bufs instead
of offsets into a shared buffer. But for OpenGL this is probably better
anyway. It's possible that in future, we could support having
independent “buffer slices” (each with their own fence/sync object), but
this would be an optimization more than anything. I also think that we
could address the underlying problem (memory closeness) differently by
making the ra_vk memory allocator smart enough to chunk together
allocations under the hood.
Diffstat (limited to 'video/out/opengl/ra_gl.c')
-rw-r--r-- | video/out/opengl/ra_gl.c | 100 |
1 files changed, 58 insertions, 42 deletions
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index 6d27d5a285..36109753aa 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -23,11 +23,11 @@ struct ra_tex_gl { GLint internal_format; GLenum format; GLenum type; - struct gl_pbo_upload pbo; }; // For ra_buf.priv struct ra_buf_gl { + GLenum target; GLuint buffer; GLsync fence; }; @@ -90,7 +90,7 @@ static int ra_init_gl(struct ra *ra, GL *gl) ra_gl_set_debug(ra, true); ra->fns = &ra_fns_gl; - ra->caps = 0; + ra->caps = RA_CAP_DIRECT_UPLOAD; if (gl->mpgl_caps & MPGL_CAP_1D_TEX) ra->caps |= RA_CAP_TEX_1D; if (gl->mpgl_caps & MPGL_CAP_3D_TEX) @@ -99,8 +99,6 @@ static int ra_init_gl(struct ra *ra, GL *gl) ra->caps |= RA_CAP_BLIT; if (gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER) ra->caps |= RA_CAP_COMPUTE; - if (gl->MapBufferRange) - ra->caps |= RA_CAP_PBO; if (gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY) ra->caps |= RA_CAP_NESTED_ARRAY; if (gl->mpgl_caps & MPGL_CAP_SSBO) @@ -226,7 +224,6 @@ static void gl_tex_destroy(struct ra *ra, struct ra_tex *tex) gl->DeleteTextures(1, &tex_gl->texture); } - gl_pbo_upload_uninit(&tex_gl->pbo); talloc_free(tex_gl); talloc_free(tex); } @@ -427,40 +424,42 @@ bool ra_is_gl(struct ra *ra) return ra->fns == &ra_fns_gl; } -static void gl_tex_upload(struct ra *ra, struct ra_tex *tex, - const void *src, ptrdiff_t stride, - struct mp_rect *rc, uint64_t flags, - struct ra_buf *buf) +static void gl_tex_upload(struct ra *ra, + const struct ra_tex_upload_params *params) { GL *gl = ra_gl_get(ra); + struct ra_tex *tex = params->tex; + struct ra_buf *buf = params->buf; struct ra_tex_gl *tex_gl = tex->priv; - struct ra_buf_gl *buf_gl = NULL; - struct mp_rect full = {0, 0, tex->params.w, tex->params.h}; + struct ra_buf_gl *buf_gl = buf ? buf->priv : NULL; + assert(tex->params.host_mutable); + assert(!params->buf || !params->src); + const void *src = params->src; if (buf) { - buf_gl = buf->priv; gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer); - src = (void *)((uintptr_t)src - (uintptr_t)buf->data); + src = (void *)params->buf_offset; } gl->BindTexture(tex_gl->target, tex_gl->texture); + if (params->invalidate && gl->InvalidateTexImage) + gl->InvalidateTexImage(tex_gl->texture, 0); switch (tex->params.dimensions) { case 1: - assert(!rc); gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, tex->params.w, 0, tex_gl->format, tex_gl->type, src); break; - case 2: - if (!rc) - rc = &full; - gl_pbo_upload_tex(&tex_gl->pbo, gl, ra->use_pbo && !buf, - tex_gl->target, tex_gl->format, tex_gl->type, - tex->params.w, tex->params.h, src, stride, - rc->x0, rc->y0, rc->x1 - rc->x0, rc->y1 - rc->y0); + case 2: { + struct mp_rect rc = {0, 0, tex->params.w, tex->params.h}; + if (params->rc) + rc = *params->rc; + gl_upload_tex(gl, tex_gl->target, tex_gl->format, tex_gl->type, + src, params->stride, rc.x0, rc.y0, rc.x1 - rc.x0, + rc.y1 - rc.y0); break; + } case 3: - assert(!rc); gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); gl->TexImage3D(GL_TEXTURE_3D, 0, tex_gl->internal_format, tex->params.w, tex->params.h, tex->params.d, 0, tex_gl->format, @@ -473,11 +472,13 @@ static void gl_tex_upload(struct ra *ra, struct ra_tex *tex, if (buf) { gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - // Make sure the PBO is not reused until GL is done with it. If a - // previous operation is pending, "update" it by creating a new - // fence that will cover the previous operation as well. - gl->DeleteSync(buf_gl->fence); - buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + if (buf->params.host_mapped) { + // Make sure the PBO is not reused until GL is done with it. If a + // previous operation is pending, "update" it by creating a new + // fence that will cover the previous operation as well. + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } } } @@ -491,10 +492,9 @@ static void gl_buf_destroy(struct ra *ra, struct ra_buf *buf) gl->DeleteSync(buf_gl->fence); if (buf->data) { - // The target type used here doesn't matter at all to OpenGL - gl->BindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer); - gl->UnmapBuffer(GL_ARRAY_BUFFER); - gl->BindBuffer(GL_ARRAY_BUFFER, 0); + gl->BindBuffer(buf_gl->target, buf_gl->buffer); + gl->UnmapBuffer(buf_gl->target); + gl->BindBuffer(buf_gl->target, 0); } gl->DeleteBuffers(1, &buf_gl->buffer); @@ -517,14 +517,13 @@ static struct ra_buf *gl_buf_create(struct ra *ra, struct ra_buf_gl *buf_gl = buf->priv = talloc_zero(NULL, struct ra_buf_gl); gl->GenBuffers(1, &buf_gl->buffer); - GLenum target; switch (params->type) { - case RA_BUF_TYPE_TEX_UPLOAD: target = GL_PIXEL_UNPACK_BUFFER; break; - case RA_BUF_TYPE_SHADER_STORAGE: target = GL_SHADER_STORAGE_BUFFER; break; + case RA_BUF_TYPE_TEX_UPLOAD: buf_gl->target = GL_PIXEL_UNPACK_BUFFER; break; + case RA_BUF_TYPE_SHADER_STORAGE: buf_gl->target = GL_SHADER_STORAGE_BUFFER; break; default: abort(); }; - gl->BindBuffer(target, buf_gl->buffer); + gl->BindBuffer(buf_gl->target, buf_gl->buffer); if (params->host_mapped) { unsigned flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | @@ -534,8 +533,9 @@ static struct ra_buf *gl_buf_create(struct ra *ra, if (params->type == RA_BUF_TYPE_TEX_UPLOAD) storflags |= GL_CLIENT_STORAGE_BIT; - gl->BufferStorage(target, params->size, params->initial_data, storflags); - buf->data = gl->MapBufferRange(target, 0, params->size, flags); + gl->BufferStorage(buf_gl->target, params->size, params->initial_data, + storflags); + buf->data = gl->MapBufferRange(buf_gl->target, 0, params->size, flags); if (!buf->data) { gl_check_error(gl, ra->log, "mapping buffer"); gl_buf_destroy(ra, buf); @@ -549,16 +549,31 @@ static struct ra_buf *gl_buf_create(struct ra *ra, default: abort(); } - gl->BufferData(target, params->size, params->initial_data, hint); + gl->BufferData(buf_gl->target, params->size, params->initial_data, hint); } - gl->BindBuffer(target, 0); + gl->BindBuffer(buf_gl->target, 0); return buf; } -static bool gl_poll_mapped_buffer(struct ra *ra, struct ra_buf *buf) +static void gl_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + GL *gl = ra_gl_get(ra); + struct ra_buf_gl *buf_gl = buf->priv; + assert(buf->params.host_mutable); + + gl->BindBuffer(buf_gl->target, buf_gl->buffer); + gl->BufferSubData(buf_gl->target, offset, size, data); + gl->BindBuffer(buf_gl->target, 0); +} + +static bool gl_buf_poll(struct ra *ra, struct ra_buf *buf) { - assert(buf->data); + // Non-persistently mapped buffers are always implicitly reusable in OpenGL, + // the implementation will create more buffers under the hood if needed. + if (!buf->data) + return true; GL *gl = ra_gl_get(ra); struct ra_buf_gl *buf_gl = buf->priv; @@ -1080,7 +1095,8 @@ static struct ra_fns ra_fns_gl = { .tex_upload = gl_tex_upload, .buf_create = gl_buf_create, .buf_destroy = gl_buf_destroy, - .poll_mapped_buffer = gl_poll_mapped_buffer, + .buf_update = gl_buf_update, + .buf_poll = gl_buf_poll, .clear = gl_clear, .blit = gl_blit, .renderpass_create = gl_renderpass_create, |