diff options
author | Niklas Haas <git@haasn.xyz> | 2017-07-15 22:11:48 +0200 |
---|---|---|
committer | Niklas Haas <git@haasn.xyz> | 2017-07-15 22:11:48 +0200 |
commit | b93bcce5df343de20096deb6d37e971bfa9a3072 (patch) | |
tree | 2dc9ab0730371dc4366751742f3228290bb84e5a /video/out | |
parent | 60d0cea31084d770f5b2d7f691ae2aea8bc05812 (diff) | |
download | mpv-b93bcce5df343de20096deb6d37e971bfa9a3072.tar.bz2 mpv-b93bcce5df343de20096deb6d37e971bfa9a3072.tar.xz |
vo_opengl: coalesce intra-plane PBOs
Instead of allocating three PBOs and cycling through them, we allocate
one PBO that's three times as large, and cycle through the subregion
offsets.
This results in arguably simpler code and faster initialization
performance. Especially for 4K textures, initializing PBOs can take
quite some time (e.g. 180ms -> 110ms). For 1080p, it's more like 66ms ->
52ms for me.
The alignment to 4096 is completely unnecessary by spec, but we do it
anyway just for peace of mind.
Diffstat (limited to 'video/out')
-rw-r--r-- | video/out/opengl/utils.c | 29 | ||||
-rw-r--r-- | video/out/opengl/utils.h | 2 |
2 files changed, 17 insertions, 14 deletions
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 3615ff92d1..9e786c6dcc 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -1313,29 +1313,31 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo, if (!use_pbo || !gl->MapBufferRange) goto no_pbo; + // We align the buffer size to 4096 to avoid possible subregion + // dependencies. This is not a strict requirement (the spec requires no + // alignment), but a good precaution for performance reasons size_t pix_stride = gl_bytes_per_pixel(format, type); - size_t buffer_size = pix_stride * tex_w * tex_h; + size_t buffer_size = FFALIGN(pix_stride * tex_w * tex_h, 4096); size_t needed_size = pix_stride * w * h; if (buffer_size != pbo->buffer_size) gl_pbo_upload_uninit(pbo); - if (!pbo->buffers[0]) { + if (!pbo->buffer) { pbo->gl = gl; pbo->buffer_size = buffer_size; - gl->GenBuffers(NUM_PBO_BUFFERS, &pbo->buffers[0]); - for (int n = 0; n < NUM_PBO_BUFFERS; n++) { - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffers[n]); - gl->BufferData(GL_PIXEL_UNPACK_BUFFER, buffer_size, NULL, - GL_DYNAMIC_COPY); - } + gl->GenBuffers(1, &pbo->buffer); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer); + gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size, + NULL, GL_DYNAMIC_COPY); } + size_t offset = buffer_size * pbo->index; pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS; - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffers[pbo->index]); - void *data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, needed_size, - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer); + void *data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT); if (!data) goto no_pbo; @@ -1346,7 +1348,7 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo, goto no_pbo; } - gl_upload_tex(gl, target, format, type, NULL, pix_stride * w, x, y, w, h); + gl_upload_tex(gl, target, format, type, (void *)offset, pix_stride * w, x, y, w, h); gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -1359,7 +1361,8 @@ no_pbo: void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo) { if (pbo->gl) - pbo->gl->DeleteBuffers(NUM_PBO_BUFFERS, &pbo->buffers[0]); + pbo->gl->DeleteBuffers(1, &pbo->buffer); + *pbo = (struct gl_pbo_upload){0}; } diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 92b1005c39..c55670b8d6 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -188,7 +188,7 @@ struct mp_pass_perf gl_timer_measure(struct gl_timer *timer); struct gl_pbo_upload { GL *gl; int index; - GLuint buffers[NUM_PBO_BUFFERS]; + GLuint buffer; size_t buffer_size; }; |