summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-15 22:11:48 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-15 22:11:48 +0200
commitb93bcce5df343de20096deb6d37e971bfa9a3072 (patch)
tree2dc9ab0730371dc4366751742f3228290bb84e5a
parent60d0cea31084d770f5b2d7f691ae2aea8bc05812 (diff)
downloadmpv-b93bcce5df343de20096deb6d37e971bfa9a3072.tar.bz2
mpv-b93bcce5df343de20096deb6d37e971bfa9a3072.tar.xz
vo_opengl: coalesce intra-plane PBOs
Instead of allocating three PBOs and cycling through them, we allocate one PBO that's three times as large, and cycle through the subregion offsets. This results in arguably simpler code and faster initialization performance. Especially for 4K textures, initializing PBOs can take quite some time (e.g. 180ms -> 110ms). For 1080p, it's more like 66ms -> 52ms for me. The alignment to 4096 is completely unnecessary by spec, but we do it anyway just for peace of mind.
-rw-r--r--video/out/opengl/utils.c29
-rw-r--r--video/out/opengl/utils.h2
2 files changed, 17 insertions, 14 deletions
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 3615ff92d1..9e786c6dcc 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -1313,29 +1313,31 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
if (!use_pbo || !gl->MapBufferRange)
goto no_pbo;
+ // We align the buffer size to 4096 to avoid possible subregion
+ // dependencies. This is not a strict requirement (the spec requires no
+ // alignment), but a good precaution for performance reasons
size_t pix_stride = gl_bytes_per_pixel(format, type);
- size_t buffer_size = pix_stride * tex_w * tex_h;
+ size_t buffer_size = FFALIGN(pix_stride * tex_w * tex_h, 4096);
size_t needed_size = pix_stride * w * h;
if (buffer_size != pbo->buffer_size)
gl_pbo_upload_uninit(pbo);
- if (!pbo->buffers[0]) {
+ if (!pbo->buffer) {
pbo->gl = gl;
pbo->buffer_size = buffer_size;
- gl->GenBuffers(NUM_PBO_BUFFERS, &pbo->buffers[0]);
- for (int n = 0; n < NUM_PBO_BUFFERS; n++) {
- gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffers[n]);
- gl->BufferData(GL_PIXEL_UNPACK_BUFFER, buffer_size, NULL,
- GL_DYNAMIC_COPY);
- }
+ gl->GenBuffers(1, &pbo->buffer);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+ gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size,
+ NULL, GL_DYNAMIC_COPY);
}
+ size_t offset = buffer_size * pbo->index;
pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS;
- gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffers[pbo->index]);
- void *data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, needed_size,
- GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+ void *data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, offset, needed_size,
+ GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT);
if (!data)
goto no_pbo;
@@ -1346,7 +1348,7 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
goto no_pbo;
}
- gl_upload_tex(gl, target, format, type, NULL, pix_stride * w, x, y, w, h);
+ gl_upload_tex(gl, target, format, type, (void *)offset, pix_stride * w, x, y, w, h);
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
@@ -1359,7 +1361,8 @@ no_pbo:
void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo)
{
if (pbo->gl)
- pbo->gl->DeleteBuffers(NUM_PBO_BUFFERS, &pbo->buffers[0]);
+ pbo->gl->DeleteBuffers(1, &pbo->buffer);
+
*pbo = (struct gl_pbo_upload){0};
}
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 92b1005c39..c55670b8d6 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -188,7 +188,7 @@ struct mp_pass_perf gl_timer_measure(struct gl_timer *timer);
struct gl_pbo_upload {
GL *gl;
int index;
- GLuint buffers[NUM_PBO_BUFFERS];
+ GLuint buffer;
size_t buffer_size;
};