vo_opengl: refactor RA texture and buffer updates

- tex_uploads args are moved to a struct - the ability to directly upload texture data without going through a buffer is made explicit - the concept of buffer updates and buffer polling is made more explicit and generalized to buf_update as well (not just mapped buffers) - the ability to call tex_upload/buf_update on a tex/buf is made explicit during tex/buf creation - uploading from buffers now uses an explicit offset instead of implicitly comparing *src against buf->data, because not all buffers may actually be persistently mapped - the initial_data = immutable requirement is dropped. (May be re-added later for D3D11 if that ever becomes a thing) This change helps the vulkan abstraction immensely and also helps move common code (like the PBO pooling) out of ra_gl and into the opengl/utils.c This also technically has the side-benefit / side-constraint of using PBOs for OSD texture uploads as well, which actually seems to help performance on machines where --opengl-pbo is faster than the naive code path. Because of this, I decided to hook up the OSD code to the opengl-pbo option as well. One drawback of this refactor is that the GL_STREAM_COPY hack for texture uploads "got lost", but I think I'm happy with that going away anyway since DR almost fully deprecates it, and it's not the "right thing" anyway - but instead an nvidia-only hack to make this stuff work somewhat better on NUMA systems with discrete GPUs. Another change is that due to the way fencing works with ra_buf (we get one fence per ra_buf per upload) we have to use multiple ra_bufs instead of offsets into a shared buffer. But for OpenGL this is probably better anyway. It's possible that in future, we could support having independent “buffer slices” (each with their own fence/sync object), but this would be an optimization more than anything. I also think that we could address the underlying problem (memory closeness) differently by making the ra_vk memory allocator smart enough to chunk together allocations under the hood.
author: Niklas Haas <git@haasn.xyz> 2017-08-16 22:13:51 +0200
committer: Niklas Haas <git@haasn.xyz> 2017-08-18 00:34:34 +0200
commit: 46d86da6300ebcd2134996c76b9238fcf8e0fb6e (patch)
tree: a9ac0b463aba5ea47112b5e93ec6570f7b37fbe2 /video/out/opengl/gl_utils.c
parent: 9ca5a2a5d839476d8a597fcc124cce41279928bc (diff)
download: mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.bz2
mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.xz
1 files changed, 0 insertions, 66 deletions
diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c
index 6c0537febc..9ec9d5d37d 100644
--- a/video/out/opengl/gl_utils.c
+++ b/video/out/opengl/gl_utils.c
@@ -269,72 +269,6 @@ void gl_set_debug_logger(GL *gl, struct mp_log *log)
         gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log);
 }
 
-// Upload a texture, going through a PBO. PBO supposedly can facilitate
-// asynchronous copy from CPU to GPU, so this is an optimization. Note that
-// changing format/type/tex_w/tex_h or reusing the PBO in the same frame can
-// ruin performance.
-// This call is like gl_upload_tex(), plus PBO management/use.
-// target, format, type, dataptr, stride, x, y, w, h: texture upload params
-//                                                    (see gl_upload_tex())
-// tex_w, tex_h: maximum size of the used texture
-// use_pbo: for convenience, if false redirects the call to gl_upload_tex
-void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
-                       GLenum target, GLenum format, GLenum type,
-                       int tex_w, int tex_h, const void *dataptr, int stride,
-                       int x, int y, int w, int h)
-{
-    assert(x >= 0 && y >= 0 && w >= 0 && h >= 0);
-    assert(x + w <= tex_w && y + h <= tex_h);
-
-    if (!use_pbo) {
-        gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
-        return;
-    }
-
-    // We align the buffer size to 4096 to avoid possible subregion
-    // dependencies. This is not a strict requirement (the spec requires no
-    // alignment), but a good precaution for performance reasons
-    size_t needed_size = stride * h;
-    size_t buffer_size = MP_ALIGN_UP(needed_size, 4096);
-
-    if (buffer_size != pbo->buffer_size)
-        gl_pbo_upload_uninit(pbo);
-
-    if (!pbo->buffer) {
-        pbo->gl = gl;
-        pbo->buffer_size = buffer_size;
-        gl->GenBuffers(1, &pbo->buffer);
-        gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
-        // Magic time: Because we memcpy once from RAM to the buffer, and then
-        // the GPU needs to read from this anyway, we actually *don't* want
-        // this buffer to be allocated in RAM. If we allocate it in VRAM
-        // instead, we can reduce this to a single copy: from RAM into VRAM.
-        // Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best
-        // allocated on host memory instead of device memory, so we lie about
-        // the usage to fool the driver into giving us a buffer in VRAM instead
-        // of RAM, which can be significantly faster for our use case.
-        // Seriously, fuck OpenGL.
-        gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size,
-                       NULL, GL_STREAM_COPY);
-    }
-
-    uintptr_t offset = buffer_size * pbo->index;
-    pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS;
-
-    gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
-    gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr);
-    gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h);
-    gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-}
-
-void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo)
-{
-    if (pbo->gl)
-        pbo->gl->DeleteBuffers(1, &pbo->buffer);
-
-    *pbo = (struct gl_pbo_upload){0};
-}
-
 int gl_get_fb_depth(GL *gl, int fbo)
 {
     if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB))
author	Niklas Haas <git@haasn.xyz>	2017-08-16 22:13:51 +0200
committer	Niklas Haas <git@haasn.xyz>	2017-08-18 00:34:34 +0200
commit	46d86da6300ebcd2134996c76b9238fcf8e0fb6e (patch)
tree	a9ac0b463aba5ea47112b5e93ec6570f7b37fbe2 /video/out/opengl/gl_utils.c
parent	9ca5a2a5d839476d8a597fcc124cce41279928bc (diff)
download	mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.bz2 mpv-46d86da6300ebcd2134996c76b9238fcf8e0fb6e.tar.xz