summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2017-08-04 19:09:46 +0200
committerwm4 <wm4@nowhere>2017-08-05 13:09:05 +0200
commitaac04c0d6496d8847499a94376e85f1711bf31d6 (patch)
treebf77bcd2d12729a3706804b29ba5951d173a3226
parentfa4a1c46759136334646e47c627ddb75e532a658 (diff)
downloadmpv-aac04c0d6496d8847499a94376e85f1711bf31d6.tar.bz2
mpv-aac04c0d6496d8847499a94376e85f1711bf31d6.tar.xz
vo_opengl: split utils.c/h
Actually GL-specific parts go into gl_utils.c/h, the shader cache (gl_sc*) into shader_cache.c/h. No semantic changes of any kind, except that the VAO helper is made public again as part of gl_utils.c (all while the goal for gl_utils.c itself is to be included by GL-specific code).
-rw-r--r--video/out/opengl/gl_utils.c502
-rw-r--r--video/out/opengl/gl_utils.h94
-rw-r--r--video/out/opengl/hwdec_vdpau.c2
-rw-r--r--video/out/opengl/osd.c1
-rw-r--r--video/out/opengl/osd.h2
-rw-r--r--video/out/opengl/ra_gl.h2
-rw-r--r--video/out/opengl/shader_cache.c952
-rw-r--r--video/out/opengl/shader_cache.h51
-rw-r--r--video/out/opengl/user_shaders.h1
-rw-r--r--video/out/opengl/utils.c1489
-rw-r--r--video/out/opengl/utils.h149
-rw-r--r--video/out/opengl/video.h2
-rw-r--r--wscript_build.py2
13 files changed, 1647 insertions, 1602 deletions
diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c
new file mode 100644
index 0000000000..c870756b1e
--- /dev/null
+++ b/video/out/opengl/gl_utils.c
@@ -0,0 +1,502 @@
+/*
+ * This file is part of mpv.
+ * Parts based on MPlayer code by Reimar Döffinger.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include <libavutil/sha.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/mem.h>
+
+#include "osdep/io.h"
+
+#include "common/common.h"
+#include "options/path.h"
+#include "stream/stream.h"
+#include "formats.h"
+#include "ra_gl.h"
+#include "gl_utils.h"
+
+// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL)
+static const char *gl_error_to_string(GLenum error)
+{
+ switch (error) {
+ case GL_INVALID_ENUM: return "INVALID_ENUM";
+ case GL_INVALID_VALUE: return "INVALID_VALUE";
+ case GL_INVALID_OPERATION: return "INVALID_OPERATION";
+ case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION";
+ case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY";
+ default: return "unknown";
+ }
+}
+
+void gl_check_error(GL *gl, struct mp_log *log, const char *info)
+{
+ for (;;) {
+ GLenum error = gl->GetError();
+ if (error == GL_NO_ERROR)
+ break;
+ mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info,
+ gl_error_to_string(error));
+ }
+}
+
+static int get_alignment(int stride)
+{
+ if (stride % 8 == 0)
+ return 8;
+ if (stride % 4 == 0)
+ return 4;
+ if (stride % 2 == 0)
+ return 2;
+ return 1;
+}
+
+// upload a texture, handling things like stride and slices
+// target: texture target, usually GL_TEXTURE_2D
+// format, type: texture parameters
+// dataptr, stride: image data
+// x, y, width, height: part of the image to upload
+void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
+ const void *dataptr, int stride,
+ int x, int y, int w, int h)
+{
+ int bpp = gl_bytes_per_pixel(format, type);
+ const uint8_t *data = dataptr;
+ int y_max = y + h;
+ if (w <= 0 || h <= 0 || !bpp)
+ return;
+ if (stride < 0) {
+ data += (h - 1) * stride;
+ stride = -stride;
+ }
+ gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride));
+ int slice = h;
+ if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) {
+ // this is not always correct, but should work for MPlayer
+ gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp);
+ } else {
+ if (stride != bpp * w)
+ slice = 1; // very inefficient, but at least it works
+ }
+ for (; y + slice <= y_max; y += slice) {
+ gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data);
+ data += stride * slice;
+ }
+ if (y < y_max)
+ gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data);
+ if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH)
+ gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4);
+}
+
+mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h)
+{
+ if (gl->es)
+ return NULL; // ES can't read from front buffer
+ mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h);
+ if (!image)
+ return NULL;
+ gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
+ GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT;
+ gl->PixelStorei(GL_PACK_ALIGNMENT, 1);
+ gl->ReadBuffer(obj);
+ //flip image while reading (and also avoid stride-related trouble)
+ for (int y = 0; y < h; y++) {
+ gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE,
+ image->planes[0] + y * image->stride[0]);
+ }
+ gl->PixelStorei(GL_PACK_ALIGNMENT, 4);
+ gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+ return image;
+}
+
+void mp_log_source(struct mp_log *log, int lev, const char *src)
+{
+ int line = 1;
+ if (!src)
+ return;
+ while (*src) {
+ const char *end = strchr(src, '\n');
+ const char *next = end + 1;
+ if (!end)
+ next = end = src + strlen(src);
+ mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src);
+ line++;
+ src = next;
+ }
+}
+
+static void gl_vao_enable_attribs(struct gl_vao *vao)
+{
+ GL *gl = vao->gl;
+
+ for (int n = 0; vao->entries[n].name; n++) {
+ const struct gl_vao_entry *e = &vao->entries[n];
+
+ gl->EnableVertexAttribArray(n);
+ gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized,
+ vao->stride, (void *)(intptr_t)e->offset);
+ }
+}
+
+void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
+ const struct gl_vao_entry *entries)
+{
+ assert(!vao->vao);
+ assert(!vao->buffer);
+
+ *vao = (struct gl_vao){
+ .gl = gl,
+ .stride = stride,
+ .entries = entries,
+ };
+
+ gl->GenBuffers(1, &vao->buffer);
+
+ if (gl->BindVertexArray) {
+ gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+
+ gl->GenVertexArrays(1, &vao->vao);
+ gl->BindVertexArray(vao->vao);
+ gl_vao_enable_attribs(vao);
+ gl->BindVertexArray(0);
+
+ gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+ }
+}
+
+void gl_vao_uninit(struct gl_vao *vao)
+{
+ GL *gl = vao->gl;
+ if (!gl)
+ return;
+
+ if (gl->DeleteVertexArrays)
+ gl->DeleteVertexArrays(1, &vao->vao);
+ gl->DeleteBuffers(1, &vao->buffer);
+
+ *vao = (struct gl_vao){0};
+}
+
+static void gl_vao_bind(struct gl_vao *vao)
+{
+ GL *gl = vao->gl;
+
+ if (gl->BindVertexArray) {
+ gl->BindVertexArray(vao->vao);
+ } else {
+ gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+ gl_vao_enable_attribs(vao);
+ gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+ }
+}
+
+static void gl_vao_unbind(struct gl_vao *vao)
+{
+ GL *gl = vao->gl;
+
+ if (gl->BindVertexArray) {
+ gl->BindVertexArray(0);
+ } else {
+ for (int n = 0; vao->entries[n].name; n++)
+ gl->DisableVertexAttribArray(n);
+ }
+}
+
+// Draw the vertex data (as described by the gl_vao_entry entries) in ptr
+// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES.
+// If ptr is NULL, then skip the upload, and use the data uploaded with the
+// previous call.
+void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num)
+{
+ GL *gl = vao->gl;
+
+ if (ptr) {
+ gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+ gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW);
+ gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+ }
+
+ gl_vao_bind(vao);
+
+ gl->DrawArrays(prim, 0, num);
+
+ gl_vao_unbind(vao);
+}
+
+static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id,
+ GLenum severity, GLsizei length,
+ const GLchar *message, const void *userParam)
+{
+ // keep in mind that the debug callback can be asynchronous
+ struct mp_log *log = (void *)userParam;
+ int level = MSGL_ERR;
+ switch (severity) {
+ case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break;
+ case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break;
+ case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break;
+ case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break;
+ }
+ mp_msg(log, level, "GL: %s\n", message);
+}
+
+void gl_set_debug_logger(GL *gl, struct mp_log *log)
+{
+ if (gl->DebugMessageCallback)
+ gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log);
+}
+
+// Maximum number of simultaneous query objects to keep around. Reducing this
+// number might cause rendering to block until the result of a previous query is
+// available
+#define QUERY_OBJECT_NUM 8
+
+struct gl_timer {
+ GL *gl;
+ GLuint query[QUERY_OBJECT_NUM];
+ int query_idx;
+
+ // these numbers are all in nanoseconds
+ uint64_t samples[PERF_SAMPLE_COUNT];
+ int sample_idx;
+ int sample_count;
+
+ uint64_t avg_sum;
+ uint64_t peak;
+};
+
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer)
+{
+ assert(timer);
+ struct mp_pass_perf res = {
+ .count = timer->sample_count,
+ .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT,
+ .peak = timer->peak,
+ .samples = timer->samples,
+ };
+
+ res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT];
+
+ if (timer->sample_count > 0) {
+ res.avg = timer->avg_sum / timer->sample_count;
+ }
+
+ return res;
+}
+
+struct gl_timer *gl_timer_create(GL *gl)
+{
+ struct gl_timer *timer = talloc_ptrtype(NULL, timer);
+ *timer = (struct gl_timer){ .gl = gl };
+
+ if (gl->GenQueries)
+ gl->GenQueries(QUERY_OBJECT_NUM, timer->query);
+
+ return timer;
+}
+
+void gl_timer_free(struct gl_timer *timer)
+{
+ if (!timer)
+ return;
+
+ GL *gl = timer->gl;
+ if (gl && gl->DeleteQueries) {
+ // this is a no-op on already uninitialized queries
+ gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query);
+ }
+
+ talloc_free(timer);
+}
+
+static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
+{
+ // Input res into the buffer and grab the previous value
+ uint64_t old = timer->samples[timer->sample_idx];
+ timer->samples[timer->sample_idx++] = new;
+ timer->sample_idx %= PERF_SAMPLE_COUNT;
+
+ // Update average and sum
+ timer->avg_sum = timer->avg_sum + new - old;
+ timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT);
+
+ // Update peak if necessary
+ if (new >= timer->peak) {
+ timer->peak = new;
+ } else if (timer->peak == old) {
+ // It's possible that the last peak was the value we just removed,
+ // if so we need to scan for the new peak
+ uint64_t peak = new;
+ for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
+ peak = MPMAX(peak, timer->samples[i]);
+ timer->peak = peak;
+ }
+}
+
+// If no free query is available, this can block. Shouldn't ever happen in
+// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM)
+// IMPORTANT: only one gl_timer object may ever be active at a single time.
+// The caling code *MUST* ensure this
+void gl_timer_start(struct gl_timer *timer)
+{
+ assert(timer);
+ GL *gl = timer->gl;
+ if (!gl->BeginQuery)
+ return;
+
+ // Get the next query object
+ GLuint id = timer->query[timer->query_idx++];
+ timer->query_idx %= QUERY_OBJECT_NUM;
+
+ // If this query object already holds a result, we need to get and
+ // record it first
+ if (gl->IsQuery(id)) {
+ GLuint64 elapsed;
+ gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed);
+ gl_timer_record(timer, elapsed);
+ }
+
+ gl->BeginQuery(GL_TIME_ELAPSED, id);
+}
+
+void gl_timer_stop(GL *gl)
+{
+ if (gl->EndQuery)
+ gl->EndQuery(GL_TIME_ELAPSED);
+}
+
+// Upload a texture, going through a PBO. PBO supposedly can facilitate
+// asynchronous copy from CPU to GPU, so this is an optimization. Note that
+// changing format/type/tex_w/tex_h or reusing the PBO in the same frame can
+// ruin performance.
+// This call is like gl_upload_tex(), plus PBO management/use.
+// target, format, type, dataptr, stride, x, y, w, h: texture upload params
+// (see gl_upload_tex())
+// tex_w, tex_h: maximum size of the used texture
+// use_pbo: for convenience, if false redirects the call to gl_upload_tex
+void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
+ GLenum target, GLenum format, GLenum type,
+ int tex_w, int tex_h, const void *dataptr, int stride,
+ int x, int y, int w, int h)
+{
+ assert(x >= 0 && y >= 0 && w >= 0 && h >= 0);
+ assert(x + w <= tex_w && y + h <= tex_h);
+
+ if (!use_pbo) {
+ gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
+ return;
+ }
+
+ // We align the buffer size to 4096 to avoid possible subregion
+ // dependencies. This is not a strict requirement (the spec requires no
+ // alignment), but a good precaution for performance reasons
+ size_t needed_size = stride * h;
+ size_t buffer_size = MP_ALIGN_UP(needed_size, 4096);
+
+ if (buffer_size != pbo->buffer_size)
+ gl_pbo_upload_uninit(pbo);
+
+ if (!pbo->buffer) {
+ pbo->gl = gl;
+ pbo->buffer_size = buffer_size;
+ gl->GenBuffers(1, &pbo->buffer);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+ // Magic time: Because we memcpy once from RAM to the buffer, and then
+ // the GPU needs to read from this anyway, we actually *don't* want
+ // this buffer to be allocated in RAM. If we allocate it in VRAM
+ // instead, we can reduce this to a single copy: from RAM into VRAM.
+ // Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best
+ // allocated on host memory instead of device memory, so we lie about
+ // the usage to fool the driver into giving us a buffer in VRAM instead
+ // of RAM, which can be significantly faster for our use case.
+ // Seriously, fuck OpenGL.
+ gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size,
+ NULL, GL_STREAM_COPY);
+ }
+
+ uintptr_t offset = buffer_size * pbo->index;
+ pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS;
+
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+ gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr);
+ gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+}
+
+void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo)
+{
+ if (pbo->gl)
+ pbo->gl->DeleteBuffers(1, &pbo->buffer);
+
+ *pbo = (struct gl_pbo_upload){0};
+}
+
+// The intention is to return the actual depth of any fixed point 16 bit
+// textures. (Actually tests only 1 format - hope that is good enough.)
+int gl_determine_16bit_tex_depth(GL *gl)
+{
+ const struct gl_format *fmt = gl_find_unorm_format(gl, 2, 1);
+ if (!gl->GetTexLevelParameteriv || !fmt) {
+ // ANGLE supports ES 3.0 and the extension, but lacks the function above.
+ if (gl->mpgl_caps & MPGL_CAP_EXT16)
+ return 16;
+ return -1;
+ }
+
+ GLuint tex;
+ gl->GenTextures(1, &tex);
+ gl->BindTexture(GL_TEXTURE_2D, tex);
+ gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, 64, 64, 0,
+ fmt->format, fmt->type, NULL);
+ GLenum pname = 0;
+ switch (fmt->format) {
+ case GL_RED: pname = GL_TEXTURE_RED_SIZE; break;
+ case GL_LUMINANCE: pname = GL_TEXTURE_LUMINANCE_SIZE; break;
+ }
+ GLint param = -1;
+ if (pname)
+ gl->GetTexLevelParameteriv(GL_TEXTURE_2D, 0, pname, &param);
+ gl->DeleteTextures(1, &tex);
+ return param;
+}
+
+int gl_get_fb_depth(GL *gl, int fbo)
+{
+ if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB))
+ return -1;
+
+ gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
+
+ GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK;
+ if (fbo)
+ obj = GL_COLOR_ATTACHMENT0;
+
+ GLint depth_g = -1;
+
+ gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj,
+ GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g);
+
+ gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+
+ return depth_g > 0 ? depth_g : -1;
+}
diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h
new file mode 100644
index 0000000000..5ae8d1590b
--- /dev/null
+++ b/video/out/opengl/gl_utils.h
@@ -0,0 +1,94 @@
+/*
+ * This file is part of mpv.
+ * Parts based on MPlayer code by Reimar Döffinger.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_GL_UTILS_
+#define MP_GL_UTILS_
+
+#include <math.h>
+
+#include "common.h"
+#include "ra.h"
+
+struct mp_log;
+
+void gl_check_error(GL *gl, struct mp_log *log, const char *info);
+
+void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
+ const void *dataptr, int stride,
+ int x, int y, int w, int h);
+
+mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h);
+
+const char* mp_sampler_type(GLenum texture_target);
+
+// print a multi line string with line numbers (e.g. for shader sources)
+// log, lev: module and log level, as in mp_msg()
+void mp_log_source(struct mp_log *log, int lev, const char *src);
+
+struct gl_vao_entry {
+ // used for shader / glBindAttribLocation
+ const char *name;
+ // glVertexAttribPointer() arguments
+ int num_elems; // size (number of elements)
+ GLenum type;
+ bool normalized;
+ int offset;
+};
+
+struct gl_vao {
+ GL *gl;
+ GLuint vao; // the VAO object, or 0 if unsupported by driver
+ GLuint buffer; // GL_ARRAY_BUFFER used for the data
+ int stride; // size of each element (interleaved elements are assumed)
+ const struct gl_vao_entry *entries;
+};
+
+void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
+ const struct gl_vao_entry *entries);
+void gl_vao_uninit(struct gl_vao *vao);
+void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);
+
+void gl_set_debug_logger(GL *gl, struct mp_log *log);
+
+struct gl_timer;
+
+struct gl_timer *gl_timer_create(GL *gl);
+void gl_timer_free(struct gl_timer *timer);
+void gl_timer_start(struct gl_timer *timer);
+void gl_timer_stop(GL *gl);
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer);
+
+#define NUM_PBO_BUFFERS 3
+
+struct gl_pbo_upload {
+ GL *gl;
+ int index;
+ GLuint buffer;
+ size_t buffer_size;
+};
+
+void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
+ GLenum target, GLenum format, GLenum type,
+ int tex_w, int tex_h, const void *dataptr, int stride,
+ int x, int y, int w, int h);
+void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo);
+
+int gl_determine_16bit_tex_depth(GL *gl);
+int gl_get_fb_depth(GL *gl, int fbo);
+
+#endif
diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c
index 712997ed7a..9ddec18e06 100644
--- a/video/out/opengl/hwdec_vdpau.c
+++ b/video/out/opengl/hwdec_vdpau.c
@@ -21,7 +21,7 @@
#include <GL/glx.h>
#include "hwdec.h"
-#include "utils.h"
+#include "gl_utils.h"
#include "video/vdpau.h"
#include "video/vdpau_mixer.h"
diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c
index 18eb36694f..aa0791139d 100644
--- a/video/out/opengl/osd.c
+++ b/video/out/opengl/osd.c
@@ -22,7 +22,6 @@
#include <libavutil/common.h>
#include "formats.h"
-#include "utils.h"
#include "osd.h"
#define GLSL(x) gl_sc_add(sc, #x "\n");
diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h
index a09c891c0b..36926f95f0 100644
--- a/video/out/opengl/osd.h
+++ b/video/out/opengl/osd.h
@@ -5,6 +5,8 @@
#include <inttypes.h>
#include "utils.h"
+#include "gl_utils.h"
+#include "shader_cache.h"
#include "sub/osd.h"
struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd);
diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h
index 23e3199aeb..016ce13419 100644
--- a/video/out/opengl/ra_gl.h
+++ b/video/out/opengl/ra_gl.h
@@ -2,7 +2,7 @@
#include "common.h"
#include "ra.h"
-#include "utils.h"
+#include "gl_utils.h"
// For ra.priv
struct ra_gl {
diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c
new file mode 100644
index 0000000000..7f8b37be64
--- /dev/null
+++ b/video/out/opengl/shader_cache.c
@@ -0,0 +1,952 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include <libavutil/sha.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/mem.h>
+
+#include "osdep/io.h"
+
+#include "common/common.h"
+#include "options/path.h"
+#include "stream/stream.h"
+#include "shader_cache.h"
+#include "formats.h"
+#include "ra_gl.h"
+#include "gl_utils.h"
+
+// Force cache flush if more than this number of shaders is created.
+#define SC_MAX_ENTRIES 48
+
+enum uniform_type {
+ UT_invalid,
+ UT_i,
+ UT_f,
+ UT_m,
+};
+
+union uniform_val {
+ GLfloat f[9];
+ GLint i[4];
+};
+
+struct sc_uniform {
+ char *name;
+ enum uniform_type type;
+ const char *glsl_type;
+ int size;
+ GLint loc;
+ union uniform_val v;
+ // Set for sampler uniforms.
+ GLenum tex_target;
+ GLuint tex_handle;
+ // Set for image uniforms
+ GLuint img_handle;
+ GLenum img_access;
+ GLenum img_iformat;
+};
+
+struct sc_buffer {
+ char *name;
+ char *format;
+ GLuint binding;
+ GLuint ssbo;
+};
+
+struct sc_cached_uniform {
+ GLint loc;
+ union uniform_val v;
+};
+
+struct sc_entry {
+ GLuint gl_shader;
+ struct sc_cached_uniform *uniforms;
+ int num_uniforms;
+ bstr frag;
+ bstr vert;
+ bstr comp;
+ struct gl_timer *timer;
+ struct gl_vao vao;
+};
+
+struct gl_shader_cache {
+ GL *gl;
+ struct mp_log *log;
+
+ // permanent
+ char **exts;
+ int num_exts;
+
+ // this is modified during use (gl_sc_add() etc.) and reset for each shader
+ bstr prelude_text;
+ bstr header_text;
+ bstr text;
+ int next_texture_unit;
+ int next_image_unit;
+ int next_buffer_binding;
+ struct gl_vao *vao; // deprecated
+
+ struct sc_entry *entries;
+ int num_entries;
+
+ struct sc_entry *current_shader; // set by gl_sc_generate()
+
+ struct sc_uniform *uniforms;
+ int num_uniforms;
+ struct sc_buffer *buffers;
+ int num_buffers;
+
+ const struct gl_vao_entry *vertex_entries;
+ size_t vertex_size;
+
+ // For checking that the user is calling gl_sc_reset() properly.
+ bool needs_reset;
+
+ bool error_state; // true if an error occurred
+
+ // temporary buffers (avoids frequent reallocations)
+ bstr tmp[5];
+
+ // For the disk-cache.
+ char *cache_dir;
+ struct mpv_global *global; // can be NULL
+};
+
+struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log)
+{
+ struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc);
+ *sc = (struct gl_shader_cache){
+ .gl = gl,
+ .log = log,
+ };
+ gl_sc_reset(sc);
+ return sc;
+}
+
+// Reset the previous pass. This must be called after
+// Unbind all GL state managed by sc - the current program and texture units.
+void gl_sc_reset(struct gl_shader_cache *sc)
+{
+ GL *gl = sc->gl;
+
+ if (sc->needs_reset) {
+ gl_timer_stop(gl);
+ gl->UseProgram(0);
+
+ for (int n = 0; n < sc->num_uniforms; n++) {
+ struct sc_uniform *u = &sc->uniforms[n];
+ if (u->type == UT_i && u->tex_target) {
+ gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
+ gl->BindTexture(u->tex_target, 0);
+ }
+ if (u->type == UT_i && u->img_access) {
+ gl->BindImageTexture(u->v.i[0], 0, 0, GL_FALSE, 0,
+ u->img_access, u->img_iformat);
+ }
+ }
+ gl->ActiveTexture(GL_TEXTURE0);
+
+ for (int n = 0; n < sc->num_buffers; n++) {
+ struct sc_buffer *b = &sc->buffers[n];
+ gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, 0);
+ }
+ }
+
+ sc->prelude_text.len = 0;
+ sc->header_text.len = 0;
+ sc->text.len = 0;
+ for (int n = 0; n < sc->num_uniforms; n++)
+ talloc_free(sc->uniforms[n].name);
+ sc->num_uniforms = 0;
+ for (int n = 0; n < sc->num_buffers; n++) {
+ talloc_free(sc->buffers[n].name);
+ talloc_free(sc->buffers[n].format);
+ }
+ sc->num_buffers = 0;
+ sc->next_texture_unit = 1; // not 0, as 0 is "free for use"
+ sc->next_image_unit = 1;
+ sc->next_buffer_binding = 1;
+ sc->vertex_entries = NULL;
+ sc->vertex_size = 0;
+ sc->current_shader = NULL;
+ sc->needs_reset = false;
+}
+
+static void sc_flush_cache(struct gl_shader_cache *sc)
+{
+ MP_VERBOSE(sc, "flushing shader cache\n");
+
+ for (int n = 0; n < sc->num_entries; n++) {
+ struct sc_entry *e = &sc->entries[n];
+ sc->gl->DeleteProgram(e->gl_shader);
+ talloc_free(e->vert.start);
+ talloc_free(e->frag.start);
+ talloc_free(e->comp.start);
+ talloc_free(e->uniforms);
+ gl_timer_free(e->timer);
+ gl_vao_uninit(&e->vao);
+ }
+ sc->num_entries = 0;
+}
+
+void gl_sc_destroy(struct gl_shader_cache *sc)
+{
+ if (!sc)
+ return;
+ gl_sc_reset(sc);
+ sc_flush_cache(sc);
+ talloc_free(sc);
+}
+
+bool gl_sc_error_state(struct gl_shader_cache *sc)
+{
+ return sc->error_state;
+}
+
+void gl_sc_reset_error(struct gl_shader_cache *sc)
+{
+ sc->error_state = false;
+}
+
+void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name)
+{
+ for (int n = 0; n < sc->num_exts; n++) {
+ if (strcmp(sc->exts[n], name) == 0)
+ return;
+ }
+ MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name));
+}
+
+#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s))
+
+void gl_sc_add(struct gl_shader_cache *sc, const char *text)
+{
+ bstr_xappend0(sc, &sc->text, text);
+}
+
+void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+ va_list ap;
+ va_start(ap, textf);
+ bstr_xappend_vasprintf(sc, &sc->text, textf, ap);
+ va_end(ap);
+}
+
+void gl_sc_hadd(struct gl_shader_cache *sc, const char *text)
+{
+ bstr_xappend0(sc, &sc->header_text, text);
+}
+
+void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+ va_list ap;
+ va_start(ap, textf);
+ bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap);
+ va_end(ap);
+}
+
+void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text)
+{
+ bstr_xappend(sc, &sc->header_text, text);
+}
+
+void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+ va_list ap;
+ va_start(ap, textf);
+ bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap);
+ va_end(ap);
+}
+
+static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
+ const char *name)
+{
+ for (int n = 0; n < sc->num_uniforms; n++) {
+ if (strcmp(sc->uniforms[n].name, name) == 0)
+ return &sc->uniforms[n];
+ }
+ // not found -> add it
+ struct sc_uniform new = {
+ .loc = -1,
+ .name = talloc_strdup(NULL, name),
+ };
+ MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new);
+ return &sc->uniforms[sc->num_uniforms - 1];
+}
+
+static struct sc_buffer *find_buffer(struct gl_shader_cache *sc,
+ const char *name)
+{
+ for (int n = 0; n < sc->num_buffers; n++) {
+ if (strcmp(sc->buffers[n].name, name) == 0)
+ return &sc->buffers[n];
+ }
+ // not found -> add it
+ struct sc_buffer new = {
+ .name = talloc_strdup(NULL, name),
+ };
+ MP_TARRAY_APPEND(sc, sc->buffers, sc->num_buffers, new);
+ return &sc->buffers[sc->num_buffers - 1];
+}
+
+const char *mp_sampler_type(GLenum texture_target)
+{
+ switch (texture_target) {
+ case GL_TEXTURE_1D: return "sampler1D";
+ case GL_TEXTURE_2D: return "sampler2D";
+ case GL_TEXTURE_RECTANGLE: return "sample