summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-16 17:17:39 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-16 17:46:24 +0200
commitdead206873c1840a21adf789b9e4b5d167012b19 (patch)
tree5de62f71ae89cd902019a315b786c239dd09327b
parent46bfa3726f6163cb812425b49a8483c635b6d623 (diff)
downloadmpv-dead206873c1840a21adf789b9e4b5d167012b19.tar.bz2
mpv-dead206873c1840a21adf789b9e4b5d167012b19.tar.xz
vo_opengl: use glBufferSubData instead of glMapBufferRange
Performance seems pretty much unchanged but I no longer get nasty spikes on NUMA systems, probably because glBufferSubData runs in the driver or something. As a simplification of the code, we also just size the PBO to always have the full size, even for cropped textures. This seems slower but not by relevant amounts, and only affects e.g. --vf=crop. It also slightly increases VRAM usage for textures with big strides. This new code path is especially nice because it no longer depends on GL_ARB_map_buffer_range, and no longer uses any functions that can possibly fail, thus simplifying control flow and seemingly deprecating the manpage's claim about possible image corruption. In theory we could also reduce NUM_PBO_BUFFERS since it doesn't seem like we're streaming uploads anyway, but leave it in there just in case some drivers disagree...
-rw-r--r--DOCS/man/options.rst3
-rw-r--r--video/out/opengl/common.c1
-rw-r--r--video/out/opengl/common.h1
-rw-r--r--video/out/opengl/utils.c32
4 files changed, 10 insertions, 27 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 07f961af83..5c25794588 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4136,9 +4136,6 @@ The following video options are currently all specific to ``--vo=opengl`` and
source video size is huge (e.g. so called "4K" video). On other drivers it
might be slower or cause latency issues.
- In theory, this can sometimes lead to sporadic and temporary image
- corruption (because reupload is not retried when it fails).
-
``--dither-depth=<N|no|auto>``
Set dither target depth to N. Default: no.
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 6913b77433..203c14b7ef 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -95,6 +95,7 @@ static const struct gl_functions gl_functions[] = {
DEF_FN(BindTexture),
DEF_FN(BlendFuncSeparate),
DEF_FN(BufferData),
+ DEF_FN(BufferSubData),
DEF_FN(Clear),
DEF_FN(ClearColor),
DEF_FN(CompileShader),
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
index 351624051f..c9162f2479 100644
--- a/video/out/opengl/common.h
+++ b/video/out/opengl/common.h
@@ -128,6 +128,7 @@ struct GL {
GLbitfield);
GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum);
void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum);
+ void (GLAPIENTRY *BufferSubData)(GLenum, GLintptr, GLsizeiptr, const GLvoid *);
void (GLAPIENTRY *ActiveTexture)(GLenum);
void (GLAPIENTRY *BindTexture)(GLenum, GLuint);
int (GLAPIENTRY *SwapInterval)(int);
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 9870936bc5..878c468175 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -1310,15 +1310,16 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
assert(x >= 0 && y >= 0 && w >= 0 && h >= 0);
assert(x + w <= tex_w && y + h <= tex_h);
- if (!use_pbo || !gl->MapBufferRange)
- goto no_pbo;
+ if (!use_pbo) {
+ gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
+ return;
+ }
// We align the buffer size to 4096 to avoid possible subregion
// dependencies. This is not a strict requirement (the spec requires no
// alignment), but a good precaution for performance reasons
- size_t pix_stride = gl_bytes_per_pixel(format, type);
- size_t buffer_size = FFALIGN(pix_stride * tex_w * tex_h, 4096);
- size_t needed_size = pix_stride * w * h;
+ size_t needed_size = stride * h;
+ size_t buffer_size = FFALIGN(needed_size, 4096);
if (buffer_size != pbo->buffer_size)
gl_pbo_upload_uninit(pbo);
@@ -1345,26 +1346,9 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS;
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
- void *data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, offset, needed_size,
- GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT);
- if (!data)
- goto no_pbo;
-
- memcpy_pic(data, dataptr, pix_stride * w, h, pix_stride * w, stride);
-
- if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) {
- gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- goto no_pbo;
- }
-
- gl_upload_tex(gl, target, format, type, (void *)offset, pix_stride * w, x, y, w, h);
-
+ gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr);
+ gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h);
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
- return;
-
-no_pbo:
- gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
}
void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo)