From aac04c0d6496d8847499a94376e85f1711bf31d6 Mon Sep 17 00:00:00 2001 From: wm4 Date: Fri, 4 Aug 2017 19:09:46 +0200 Subject: vo_opengl: split utils.c/h Actually GL-specific parts go into gl_utils.c/h, the shader cache (gl_sc*) into shader_cache.c/h. No semantic changes of any kind, except that the VAO helper is made public again as part of gl_utils.c (all while the goal for gl_utils.c itself is to be included by GL-specific code). --- video/out/opengl/gl_utils.c | 502 +++++++++++++ video/out/opengl/gl_utils.h | 94 +++ video/out/opengl/hwdec_vdpau.c | 2 +- video/out/opengl/osd.c | 1 - video/out/opengl/osd.h | 2 + video/out/opengl/ra_gl.h | 2 +- video/out/opengl/shader_cache.c | 952 +++++++++++++++++++++++++ video/out/opengl/shader_cache.h | 51 ++ video/out/opengl/user_shaders.h | 1 + video/out/opengl/utils.c | 1489 +-------------------------------------- video/out/opengl/utils.h | 149 +--- video/out/opengl/video.h | 2 + wscript_build.py | 2 + 13 files changed, 1647 insertions(+), 1602 deletions(-) create mode 100644 video/out/opengl/gl_utils.c create mode 100644 video/out/opengl/gl_utils.h create mode 100644 video/out/opengl/shader_cache.c create mode 100644 video/out/opengl/shader_cache.h diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c new file mode 100644 index 0000000000..c870756b1e --- /dev/null +++ b/video/out/opengl/gl_utils.c @@ -0,0 +1,502 @@ +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "formats.h" +#include "ra_gl.h" +#include "gl_utils.h" + +// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) +static const char *gl_error_to_string(GLenum error) +{ + switch (error) { + case GL_INVALID_ENUM: return "INVALID_ENUM"; + case GL_INVALID_VALUE: return "INVALID_VALUE"; + case GL_INVALID_OPERATION: return "INVALID_OPERATION"; + case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; + case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; + default: return "unknown"; + } +} + +void gl_check_error(GL *gl, struct mp_log *log, const char *info) +{ + for (;;) { + GLenum error = gl->GetError(); + if (error == GL_NO_ERROR) + break; + mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, + gl_error_to_string(error)); + } +} + +static int get_alignment(int stride) +{ + if (stride % 8 == 0) + return 8; + if (stride % 4 == 0) + return 4; + if (stride % 2 == 0) + return 2; + return 1; +} + +// upload a texture, handling things like stride and slices +// target: texture target, usually GL_TEXTURE_2D +// format, type: texture parameters +// dataptr, stride: image data +// x, y, width, height: part of the image to upload +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h) +{ + int bpp = gl_bytes_per_pixel(format, type); + const uint8_t *data = dataptr; + int y_max = y + h; + if (w <= 0 || h <= 0 || !bpp) + return; + if (stride < 0) { + data += (h - 1) * stride; + stride = -stride; + } + gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); + int slice = h; + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { + // this is not always correct, but should work for MPlayer + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); + } else { + if (stride != bpp * w) + slice = 1; // very inefficient, but at least it works + } + for (; y + slice <= y_max; y += slice) { + gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); + data += stride * slice; + } + if (y < y_max) + gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); +} + +mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) +{ + if (gl->es) + return NULL; // ES can't read from front buffer + mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); + if (!image) + return NULL; + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); + GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; + gl->PixelStorei(GL_PACK_ALIGNMENT, 1); + gl->ReadBuffer(obj); + //flip image while reading (and also avoid stride-related trouble) + for (int y = 0; y < h; y++) { + gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, + image->planes[0] + y * image->stride[0]); + } + gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + return image; +} + +void mp_log_source(struct mp_log *log, int lev, const char *src) +{ + int line = 1; + if (!src) + return; + while (*src) { + const char *end = strchr(src, '\n'); + const char *next = end + 1; + if (!end) + next = end = src + strlen(src); + mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); + line++; + src = next; + } +} + +static void gl_vao_enable_attribs(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + for (int n = 0; vao->entries[n].name; n++) { + const struct gl_vao_entry *e = &vao->entries[n]; + + gl->EnableVertexAttribArray(n); + gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized, + vao->stride, (void *)(intptr_t)e->offset); + } +} + +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct gl_vao_entry *entries) +{ + assert(!vao->vao); + assert(!vao->buffer); + + *vao = (struct gl_vao){ + .gl = gl, + .stride = stride, + .entries = entries, + }; + + gl->GenBuffers(1, &vao->buffer); + + if (gl->BindVertexArray) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + + gl->GenVertexArrays(1, &vao->vao); + gl->BindVertexArray(vao->vao); + gl_vao_enable_attribs(vao); + gl->BindVertexArray(0); + + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } +} + +void gl_vao_uninit(struct gl_vao *vao) +{ + GL *gl = vao->gl; + if (!gl) + return; + + if (gl->DeleteVertexArrays) + gl->DeleteVertexArrays(1, &vao->vao); + gl->DeleteBuffers(1, &vao->buffer); + + *vao = (struct gl_vao){0}; +} + +static void gl_vao_bind(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(vao->vao); + } else { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl_vao_enable_attribs(vao); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } +} + +static void gl_vao_unbind(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(0); + } else { + for (int n = 0; vao->entries[n].name; n++) + gl->DisableVertexAttribArray(n); + } +} + +// Draw the vertex data (as described by the gl_vao_entry entries) in ptr +// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. +// If ptr is NULL, then skip the upload, and use the data uploaded with the +// previous call. +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) +{ + GL *gl = vao->gl; + + if (ptr) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } + + gl_vao_bind(vao); + + gl->DrawArrays(prim, 0, num); + + gl_vao_unbind(vao); +} + +static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar *message, const void *userParam) +{ + // keep in mind that the debug callback can be asynchronous + struct mp_log *log = (void *)userParam; + int level = MSGL_ERR; + switch (severity) { + case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break; + case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break; + case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break; + case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break; + } + mp_msg(log, level, "GL: %s\n", message); +} + +void gl_set_debug_logger(GL *gl, struct mp_log *log) +{ + if (gl->DebugMessageCallback) + gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); +} + +// Maximum number of simultaneous query objects to keep around. Reducing this +// number might cause rendering to block until the result of a previous query is +// available +#define QUERY_OBJECT_NUM 8 + +struct gl_timer { + GL *gl; + GLuint query[QUERY_OBJECT_NUM]; + int query_idx; + + // these numbers are all in nanoseconds + uint64_t samples[PERF_SAMPLE_COUNT]; + int sample_idx; + int sample_count; + + uint64_t avg_sum; + uint64_t peak; +}; + +struct mp_pass_perf gl_timer_measure(struct gl_timer *timer) +{ + assert(timer); + struct mp_pass_perf res = { + .count = timer->sample_count, + .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT, + .peak = timer->peak, + .samples = timer->samples, + }; + + res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT]; + + if (timer->sample_count > 0) { + res.avg = timer->avg_sum / timer->sample_count; + } + + return res; +} + +struct gl_timer *gl_timer_create(GL *gl) +{ + struct gl_timer *timer = talloc_ptrtype(NULL, timer); + *timer = (struct gl_timer){ .gl = gl }; + + if (gl->GenQueries) + gl->GenQueries(QUERY_OBJECT_NUM, timer->query); + + return timer; +} + +void gl_timer_free(struct gl_timer *timer) +{ + if (!timer) + return; + + GL *gl = timer->gl; + if (gl && gl->DeleteQueries) { + // this is a no-op on already uninitialized queries + gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query); + } + + talloc_free(timer); +} + +static void gl_timer_record(struct gl_timer *timer, GLuint64 new) +{ + // Input res into the buffer and grab the previous value + uint64_t old = timer->samples[timer->sample_idx]; + timer->samples[timer->sample_idx++] = new; + timer->sample_idx %= PERF_SAMPLE_COUNT; + + // Update average and sum + timer->avg_sum = timer->avg_sum + new - old; + timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT); + + // Update peak if necessary + if (new >= timer->peak) { + timer->peak = new; + } else if (timer->peak == old) { + // It's possible that the last peak was the value we just removed, + // if so we need to scan for the new peak + uint64_t peak = new; + for (int i = 0; i < PERF_SAMPLE_COUNT; i++) + peak = MPMAX(peak, timer->samples[i]); + timer->peak = peak; + } +} + +// If no free query is available, this can block. Shouldn't ever happen in +// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM) +// IMPORTANT: only one gl_timer object may ever be active at a single time. +// The caling code *MUST* ensure this +void gl_timer_start(struct gl_timer *timer) +{ + assert(timer); + GL *gl = timer->gl; + if (!gl->BeginQuery) + return; + + // Get the next query object + GLuint id = timer->query[timer->query_idx++]; + timer->query_idx %= QUERY_OBJECT_NUM; + + // If this query object already holds a result, we need to get and + // record it first + if (gl->IsQuery(id)) { + GLuint64 elapsed; + gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed); + gl_timer_record(timer, elapsed); + } + + gl->BeginQuery(GL_TIME_ELAPSED, id); +} + +void gl_timer_stop(GL *gl) +{ + if (gl->EndQuery) + gl->EndQuery(GL_TIME_ELAPSED); +} + +// Upload a texture, going through a PBO. PBO supposedly can facilitate +// asynchronous copy from CPU to GPU, so this is an optimization. Note that +// changing format/type/tex_w/tex_h or reusing the PBO in the same frame can +// ruin performance. +// This call is like gl_upload_tex(), plus PBO management/use. +// target, format, type, dataptr, stride, x, y, w, h: texture upload params +// (see gl_upload_tex()) +// tex_w, tex_h: maximum size of the used texture +// use_pbo: for convenience, if false redirects the call to gl_upload_tex +void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo, + GLenum target, GLenum format, GLenum type, + int tex_w, int tex_h, const void *dataptr, int stride, + int x, int y, int w, int h) +{ + assert(x >= 0 && y >= 0 && w >= 0 && h >= 0); + assert(x + w <= tex_w && y + h <= tex_h); + + if (!use_pbo) { + gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h); + return; + } + + // We align the buffer size to 4096 to avoid possible subregion + // dependencies. This is not a strict requirement (the spec requires no + // alignment), but a good precaution for performance reasons + size_t needed_size = stride * h; + size_t buffer_size = MP_ALIGN_UP(needed_size, 4096); + + if (buffer_size != pbo->buffer_size) + gl_pbo_upload_uninit(pbo); + + if (!pbo->buffer) { + pbo->gl = gl; + pbo->buffer_size = buffer_size; + gl->GenBuffers(1, &pbo->buffer); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer); + // Magic time: Because we memcpy once from RAM to the buffer, and then + // the GPU needs to read from this anyway, we actually *don't* want + // this buffer to be allocated in RAM. If we allocate it in VRAM + // instead, we can reduce this to a single copy: from RAM into VRAM. + // Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best + // allocated on host memory instead of device memory, so we lie about + // the usage to fool the driver into giving us a buffer in VRAM instead + // of RAM, which can be significantly faster for our use case. + // Seriously, fuck OpenGL. + gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size, + NULL, GL_STREAM_COPY); + } + + uintptr_t offset = buffer_size * pbo->index; + pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS; + + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer); + gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr); + gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); +} + +void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo) +{ + if (pbo->gl) + pbo->gl->DeleteBuffers(1, &pbo->buffer); + + *pbo = (struct gl_pbo_upload){0}; +} + +// The intention is to return the actual depth of any fixed point 16 bit +// textures. (Actually tests only 1 format - hope that is good enough.) +int gl_determine_16bit_tex_depth(GL *gl) +{ + const struct gl_format *fmt = gl_find_unorm_format(gl, 2, 1); + if (!gl->GetTexLevelParameteriv || !fmt) { + // ANGLE supports ES 3.0 and the extension, but lacks the function above. + if (gl->mpgl_caps & MPGL_CAP_EXT16) + return 16; + return -1; + } + + GLuint tex; + gl->GenTextures(1, &tex); + gl->BindTexture(GL_TEXTURE_2D, tex); + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, 64, 64, 0, + fmt->format, fmt->type, NULL); + GLenum pname = 0; + switch (fmt->format) { + case GL_RED: pname = GL_TEXTURE_RED_SIZE; break; + case GL_LUMINANCE: pname = GL_TEXTURE_LUMINANCE_SIZE; break; + } + GLint param = -1; + if (pname) + gl->GetTexLevelParameteriv(GL_TEXTURE_2D, 0, pname, ¶m); + gl->DeleteTextures(1, &tex); + return param; +} + +int gl_get_fb_depth(GL *gl, int fbo) +{ + if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB)) + return -1; + + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); + + GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; + if (fbo) + obj = GL_COLOR_ATTACHMENT0; + + GLint depth_g = -1; + + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + return depth_g > 0 ? depth_g : -1; +} diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h new file mode 100644 index 0000000000..5ae8d1590b --- /dev/null +++ b/video/out/opengl/gl_utils.h @@ -0,0 +1,94 @@ +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MP_GL_UTILS_ +#define MP_GL_UTILS_ + +#include + +#include "common.h" +#include "ra.h" + +struct mp_log; + +void gl_check_error(GL *gl, struct mp_log *log, const char *info); + +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h); + +mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); + +const char* mp_sampler_type(GLenum texture_target); + +// print a multi line string with line numbers (e.g. for shader sources) +// log, lev: module and log level, as in mp_msg() +void mp_log_source(struct mp_log *log, int lev, const char *src); + +struct gl_vao_entry { + // used for shader / glBindAttribLocation + const char *name; + // glVertexAttribPointer() arguments + int num_elems; // size (number of elements) + GLenum type; + bool normalized; + int offset; +}; + +struct gl_vao { + GL *gl; + GLuint vao; // the VAO object, or 0 if unsupported by driver + GLuint buffer; // GL_ARRAY_BUFFER used for the data + int stride; // size of each element (interleaved elements are assumed) + const struct gl_vao_entry *entries; +}; + +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct gl_vao_entry *entries); +void gl_vao_uninit(struct gl_vao *vao); +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); + +void gl_set_debug_logger(GL *gl, struct mp_log *log); + +struct gl_timer; + +struct gl_timer *gl_timer_create(GL *gl); +void gl_timer_free(struct gl_timer *timer); +void gl_timer_start(struct gl_timer *timer); +void gl_timer_stop(GL *gl); +struct mp_pass_perf gl_timer_measure(struct gl_timer *timer); + +#define NUM_PBO_BUFFERS 3 + +struct gl_pbo_upload { + GL *gl; + int index; + GLuint buffer; + size_t buffer_size; +}; + +void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo, + GLenum target, GLenum format, GLenum type, + int tex_w, int tex_h, const void *dataptr, int stride, + int x, int y, int w, int h); +void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo); + +int gl_determine_16bit_tex_depth(GL *gl); +int gl_get_fb_depth(GL *gl, int fbo); + +#endif diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c index 712997ed7a..9ddec18e06 100644 --- a/video/out/opengl/hwdec_vdpau.c +++ b/video/out/opengl/hwdec_vdpau.c @@ -21,7 +21,7 @@ #include #include "hwdec.h" -#include "utils.h" +#include "gl_utils.h" #include "video/vdpau.h" #include "video/vdpau_mixer.h" diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c index 18eb36694f..aa0791139d 100644 --- a/video/out/opengl/osd.c +++ b/video/out/opengl/osd.c @@ -22,7 +22,6 @@ #include #include "formats.h" -#include "utils.h" #include "osd.h" #define GLSL(x) gl_sc_add(sc, #x "\n"); diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h index a09c891c0b..36926f95f0 100644 --- a/video/out/opengl/osd.h +++ b/video/out/opengl/osd.h @@ -5,6 +5,8 @@ #include #include "utils.h" +#include "gl_utils.h" +#include "shader_cache.h" #include "sub/osd.h" struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd); diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h index 23e3199aeb..016ce13419 100644 --- a/video/out/opengl/ra_gl.h +++ b/video/out/opengl/ra_gl.h @@ -2,7 +2,7 @@ #include "common.h" #include "ra.h" -#include "utils.h" +#include "gl_utils.h" // For ra.priv struct ra_gl { diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c new file mode 100644 index 0000000000..7f8b37be64 --- /dev/null +++ b/video/out/opengl/shader_cache.c @@ -0,0 +1,952 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "shader_cache.h" +#include "formats.h" +#include "ra_gl.h" +#include "gl_utils.h" + +// Force cache flush if more than this number of shaders is created. +#define SC_MAX_ENTRIES 48 + +enum uniform_type { + UT_invalid, + UT_i, + UT_f, + UT_m, +}; + +union uniform_val { + GLfloat f[9]; + GLint i[4]; +}; + +struct sc_uniform { + char *name; + enum uniform_type type; + const char *glsl_type; + int size; + GLint loc; + union uniform_val v; + // Set for sampler uniforms. + GLenum tex_target; + GLuint tex_handle; + // Set for image uniforms + GLuint img_handle; + GLenum img_access; + GLenum img_iformat; +}; + +struct sc_buffer { + char *name; + char *format; + GLuint binding; + GLuint ssbo; +}; + +struct sc_cached_uniform { + GLint loc; + union uniform_val v; +}; + +struct sc_entry { + GLuint gl_shader; + struct sc_cached_uniform *uniforms; + int num_uniforms; + bstr frag; + bstr vert; + bstr comp; + struct gl_timer *timer; + struct gl_vao vao; +}; + +struct gl_shader_cache { + GL *gl; + struct mp_log *log; + + // permanent + char **exts; + int num_exts; + + // this is modified during use (gl_sc_add() etc.) and reset for each shader + bstr prelude_text; + bstr header_text; + bstr text; + int next_texture_unit; + int next_image_unit; + int next_buffer_binding; + struct gl_vao *vao; // deprecated + + struct sc_entry *entries; + int num_entries; + + struct sc_entry *current_shader; // set by gl_sc_generate() + + struct sc_uniform *uniforms; + int num_uniforms; + struct sc_buffer *buffers; + int num_buffers; + + const struct gl_vao_entry *vertex_entries; + size_t vertex_size; + + // For checking that the user is calling gl_sc_reset() properly. + bool needs_reset; + + bool error_state; // true if an error occurred + + // temporary buffers (avoids frequent reallocations) + bstr tmp[5]; + + // For the disk-cache. + char *cache_dir; + struct mpv_global *global; // can be NULL +}; + +struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log) +{ + struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); + *sc = (struct gl_shader_cache){ + .gl = gl, + .log = log, + }; + gl_sc_reset(sc); + return sc; +} + +// Reset the previous pass. This must be called after +// Unbind all GL state managed by sc - the current program and texture units. +void gl_sc_reset(struct gl_shader_cache *sc) +{ + GL *gl = sc->gl; + + if (sc->needs_reset) { + gl_timer_stop(gl); + gl->UseProgram(0); + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type == UT_i && u->tex_target) { + gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]); + gl->BindTexture(u->tex_target, 0); + } + if (u->type == UT_i && u->img_access) { + gl->BindImageTexture(u->v.i[0], 0, 0, GL_FALSE, 0, + u->img_access, u->img_iformat); + } + } + gl->ActiveTexture(GL_TEXTURE0); + + for (int n = 0; n < sc->num_buffers; n++) { + struct sc_buffer *b = &sc->buffers[n]; + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, 0); + } + } + + sc->prelude_text.len = 0; + sc->header_text.len = 0; + sc->text.len = 0; + for (int n = 0; n < sc->num_uniforms; n++) + talloc_free(sc->uniforms[n].name); + sc->num_uniforms = 0; + for (int n = 0; n < sc->num_buffers; n++) { + talloc_free(sc->buffers[n].name); + talloc_free(sc->buffers[n].format); + } + sc->num_buffers = 0; + sc->next_texture_unit = 1; // not 0, as 0 is "free for use" + sc->next_image_unit = 1; + sc->next_buffer_binding = 1; + sc->vertex_entries = NULL; + sc->vertex_size = 0; + sc->current_shader = NULL; + sc->needs_reset = false; +} + +static void sc_flush_cache(struct gl_shader_cache *sc) +{ + MP_VERBOSE(sc, "flushing shader cache\n"); + + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *e = &sc->entries[n]; + sc->gl->DeleteProgram(e->gl_shader); + talloc_free(e->vert.start); + talloc_free(e->frag.start); + talloc_free(e->comp.start); + talloc_free(e->uniforms); + gl_timer_free(e->timer); + gl_vao_uninit(&e->vao); + } + sc->num_entries = 0; +} + +void gl_sc_destroy(struct gl_shader_cache *sc) +{ + if (!sc) + return; + gl_sc_reset(sc); + sc_flush_cache(sc); + talloc_free(sc); +} + +bool gl_sc_error_state(struct gl_shader_cache *sc) +{ + return sc->error_state; +} + +void gl_sc_reset_error(struct gl_shader_cache *sc) +{ + sc->error_state = false; +} + +void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) +{ + for (int n = 0; n < sc->num_exts; n++) { + if (strcmp(sc->exts[n], name) == 0) + return; + } + MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); +} + +#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s)) + +void gl_sc_add(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->text, text); +} + +void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->header_text, text); +} + +void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) +{ + bstr_xappend(sc, &sc->header_text, text); +} + +void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap); + va_end(ap); +} + +static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, + const char *name) +{ + for (int n = 0; n < sc->num_uniforms; n++) { + if (strcmp(sc->uniforms[n].name, name) == 0) + return &sc->uniforms[n]; + } + // not found -> add it + struct sc_uniform new = { + .loc = -1, + .name = talloc_strdup(NULL, name), + }; + MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); + return &sc->uniforms[sc->num_uniforms - 1]; +} + +static struct sc_buffer *find_buffer(struct gl_shader_cache *sc, + const char *name) +{ + for (int n = 0; n < sc->num_buffers; n++) { + if (strcmp(sc->buffers[n].name, name) == 0) + return &sc->buffers[n]; + } + // not found -> add it + struct sc_buffer new = { + .name = talloc_strdup(NULL, name), + }; + MP_TARRAY_APPEND(sc, sc->buffers, sc->num_buffers, new); + return &sc->buffers[sc->num_buffers - 1]; +} + +const char *mp_sampler_type(GLenum texture_target) +{ + switch (texture_target) { + case GL_TEXTURE_1D: return "sampler1D"; + case GL_TEXTURE_2D: return "sampler2D"; + case GL_TEXTURE_RECTANGLE: return "sampler2DRect"; + case GL_TEXTURE_EXTERNAL_OES: return "samplerExternalOES"; + case GL_TEXTURE_3D: return "sampler3D"; + default: abort(); + } +} + +void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target, + GLuint texture) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_i; + u->size = 1; + u->glsl_type = mp_sampler_type(target); + u->v.i[0] = sc->next_texture_unit++; + u->tex_target = target; + u->tex_handle = texture; +} + +void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_i; + u->size = 1; + u->glsl_type = sc->gl->es ? "highp usampler2D" : "usampler2D"; + u->v.i[0] = sc->next_texture_unit++; + u->tex_target = GL_TEXTURE_2D; + u->tex_handle = texture; +} + +void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, + struct ra_tex *tex) +{ + struct ra_tex_gl *tex_gl = tex->priv; + if (tex->params.format->ctype == RA_CTYPE_UINT) { + gl_sc_uniform_tex_ui(sc, name, tex_gl->texture); + } else { + gl_sc_uniform_tex(sc, name, tex_gl->target, tex_gl->texture); + } +} + +static const char *mp_image2D_type(GLenum access) +{ + switch (access) { + case GL_WRITE_ONLY: return "writeonly image2D"; + case GL_READ_ONLY: return "readonly image2D"; + case GL_READ_WRITE: return "image2D"; + default: abort(); + } +} + +void gl_sc_uniform_image2D(struct gl_shader_cache *sc, const char *name, + GLuint texture, GLuint iformat, GLenum access) +{ + gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); + + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_i; + u->size = 1; + u->glsl_type = mp_image2D_type(access); + u->v.i[0] = sc->next_image_unit++; + u->img_handle = texture; + u->img_access = access; + u->img_iformat = iformat; +} + +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo, + char *format, ...) +{ + gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); + + struct sc_buffer *b = find_buffer(sc, name); + b->binding = sc->next_buffer_binding++; + b->ssbo = ssbo; + b->format = format; + + va_list ap; + va_start(ap, format); + b->format = ta_vasprintf(sc, format, ap); + va_end(ap); +} + +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_f; + u->size = 1; + u->glsl_type = "float"; + u->v.f[0] = f; +} + +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint i) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_i; + u->size = 1; + u->glsl_type = "int"; + u->v.i[0] = i; +} + +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_f; + u->size = 2; + u->glsl_type = "vec2"; + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; +} + +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_f; + u->size = 3; + u->glsl_type = "vec3"; + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; + u->v.f[2] = f[2]; +} + +static void transpose2x2(float r[2 * 2]) +{ + MPSWAP(float, r[0+2*1], r[1+2*0]); +} + +void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, + bool transpose, GLfloat *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_m; + u->size = 2; + u->glsl_type = "mat2"; + for (int n = 0; n < 4; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose2x2(&u->v.f[0]); +} + +static void transpose3x3(float r[3 * 3]) +{ + MPSWAP(float, r[0+3*1], r[1+3*0]); + MPSWAP(float, r[0+3*2], r[2+3*0]); + MPSWAP(float, r[1+3*2], r[2+3*1]); +} + +void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, + bool transpose, GLfloat *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_m; + u->size = 3; + u->glsl_type = "mat3"; + for (int n = 0; n < 9; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose3x3(&u->v.f[0]); +} + +// Tell the shader generator (and later gl_sc_draw_data()) about the vertex +// data layout and attribute names. The entries array is terminated with a {0} +// entry. The array memory must remain valid indefinitely (for now). +void gl_sc_set_vertex_format(struct gl_shader_cache *sc, + const struct gl_vao_entry *entries, + size_t vertex_size) +{ + sc->vertex_entries = entries; + sc->vertex_size = vertex_size; +} + +static const char *vao_glsl_type(const struct gl_vao_entry *e) +{ + // pretty dumb... too dumb, but works for us + switch (e->num_elems) { + case 1: return "float"; + case 2: return "vec2"; + case 3: return "vec3"; + case 4: return "vec4"; + default: abort(); + } +} + +// Assumes program is current (gl->UseProgram(program)). +static void update_uniform(GL *gl, struct sc_entry *e, struct sc_uniform *u, int n) +{ + struct sc_cached_uniform *un = &e->uniforms[n]; + GLint loc = un->loc; + if (loc < 0) + return; + switch (u->type) { + case UT_i: + assert(u->size == 1); + if (memcmp(un->v.i, u->v.i, sizeof(u->v.i)) != 0) { + memcpy(un->v.i, u->v.i, sizeof(u->v.i)); + gl->Uniform1i(loc, u->v.i[0]); + } + // For samplers: set the actual texture. + if (u->tex_target) { + gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]); + gl->BindTexture(u->tex_target, u->tex_handle); + } + if (u->img_handle) { + gl->BindImageTexture(u->v.i[0], u->img_handle, 0, GL_FALSE, 0, + u->img_access, u->img_iformat); + } + break; + case UT_f: + if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) { + memcpy(un->v.f, u->v.f, sizeof(u->v.f)); + switch (u->size) { + case 1: gl->Uniform1f(loc, u->v.f[0]); break; + case 2: gl->Uniform2f(loc, u->v.f[0], u->v.f[1]); break; + case 3: gl->Uniform3f(loc, u->v.f[0], u->v.f[1], u->v.f[2]); break; + case 4: gl->Uniform4f(loc, u->v.f[0], u->v.f[1], u->v.f[2], + u->v.f[3]); break; + default: abort(); + } + } + break; + case UT_m: + if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) { + memcpy(un->v.f, u->v.f, sizeof(u->v.f)); + switch (u->size) { + case 2: gl->UniformMatrix2fv(loc, 1, GL_FALSE, &u->v.f[0]); break; + case 3: gl->UniformMatrix3fv(loc, 1, GL_FALSE, &u->v.f[0]); break; + default: abort(); + } + } + break; + default: + abort(); + } +} + +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global, + const char *dir) +{ + talloc_free(sc->cache_dir); + sc->cache_dir = talloc_strdup(sc, dir); + sc->global = global; +} + +static const char *shader_typestr(GLenum type) +{ + switch (type) { + case GL_VERTEX_SHADER: return "vertex"; + case GL_FRAGMENT_SHADER: return "fragment"; + case GL_COMPUTE_SHADER: return "compute"; + default: abort(); + } +} + +static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program, + GLenum type, const char *source) +{ + GL *gl = sc->gl; + + GLuint shader = gl->CreateShader(type); + gl->ShaderSource(shader, 1, &source, NULL); + gl->CompileShader(shader); + GLint status = 0; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); + GLint log_length = 0; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + const char *typestr = shader_typestr(type); + if (mp_msg_test(sc->log, pri)) { + MP_MSG(sc, pri, "%s shader source:\n", typestr); + mp_log_source(sc->log, pri, source); + } + if (log_length > 1) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetShaderInfoLog(shader, log_length, NULL, logstr); + MP_MSG(sc, pri, "%s shader compile log (status=%d):\n%s\n", + typestr, status, logstr); + talloc_free(logstr); + } + if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(sc->log, MSGL_DEBUG)) { + GLint len = 0; + gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len); + if (len > 0) { + GLchar *sstr = talloc_zero_size(NULL, len + 1); + gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr); + MP_DBG(sc, "Translated shader:\n"); + mp_log_source(sc->log, MSGL_DEBUG, sstr); + } + } + + gl->AttachShader(program, shader); + gl->DeleteShader(shader); + + if (!status) + sc->error_state = true; +} + +static void link_shader(struct gl_shader_cache *sc, GLuint program) +{ + GL *gl = sc->gl; + gl->LinkProgram(program); + GLint status = 0; + gl->GetProgramiv(program, GL_LINK_STATUS, &status); + GLint log_length = 0; + gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + if (mp_msg_test(sc->log, pri)) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetProgramInfoLog(program, log_length, NULL, logstr); + MP_MSG(sc, pri, "shader link log (status=%d): %s\n", status, logstr); + talloc_free(logstr); + } + + if (!status) + sc->error_state = true; +} + +// either 'compute' or both 'vertex' and 'frag' are needed +static GLuint compile_program(struct gl_shader_cache *sc, struct bstr *vertex, + struct bstr *frag, struct bstr *compute) +{ + GL *gl = sc->gl; + GLuint prog = gl->CreateProgram(); + if (compute) + compile_attach_shader(sc, prog, GL_COMPUTE_SHADER, compute->start); + if (vertex && frag) { + compile_attach_shader(sc, prog, GL_VERTEX_SHADER, vertex->start); + compile_attach_shader(sc, prog, GL_FRAGMENT_SHADER, frag->start); + for (int n = 0; sc->vertex_entries[n].name; n++) { + char *vname = mp_tprintf(80, "vertex_%s", sc->vertex_entries[n].name); + gl->BindAttribLocation(prog, n, vname); + } + } + link_shader(sc, prog); + return prog; +} + +static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex, + struct bstr *frag, struct bstr *compute) +{ + GL *gl = sc->gl; + + MP_VERBOSE(sc, "new shader program:\n"); + if (sc->header_text.len) { + MP_VERBOSE(sc, "header:\n"); + mp_log_source(sc->log, MSGL_V, sc->header_text.start); + MP_VERBOSE(sc, "body:\n"); + } + if (sc->text.len) + mp_log_source(sc->log, MSGL_V, sc->text.start); + + if (!sc->cache_dir || !sc->cache_dir[0] || !gl->ProgramBinary) + return compile_program(sc, vertex, frag, compute); + + // Try to load it from a disk cache, or compiling + saving it. + + GLuint prog = 0; + void *tmp = talloc_new(NULL); + char *dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); + + struct AVSHA *sha = av_sha_alloc(); + if (!sha) + abort(); + av_sha_init(sha, 256); + + if (vertex) + av_sha_update(sha, vertex->start, vertex->len + 1); + if (frag) + av_sha_update(sha, frag->start, frag->len + 1); + if (compute) + av_sha_update(sha, compute->start, compute->len + 1); + + // In theory, the array could change order, breaking old binaries. + for (int n = 0; sc->vertex_entries[n].name; n++) { + av_sha_update(sha, sc->vertex_entries[n].name, + strlen(sc->vertex_entries[n].name) + 1); + } + + uint8_t hash[256 / 8]; + av_sha_final(sha, hash); + av_free(sha); + + char hashstr[256 / 8 * 2 + 1]; + for (int n = 0; n < 256 / 8; n++) + snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); + + const char *header = "mpv shader cache v1\n"; + size_t header_size = strlen(header) + 4; + + char *filename = mp_path_join(tmp, dir, hashstr); + if (stat(filename, &(struct stat){0}) == 0) { + MP_VERBOSE(sc, "Trying to load shader from disk...\n"); + struct bstr cachedata = stream_read_file(filename, tmp, sc->global, + 1000000000); // 1 GB + if (cachedata.len > header_size) { + GLenum format = AV_RL32(cachedata.start + header_size - 4); + prog = gl->CreateProgram(); + gl_check_error(gl, sc->log, "before loading program"); + gl->ProgramBinary(prog, format, cachedata.start + header_size, + cachedata.len - header_size); + gl->GetError(); // discard potential useless error + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + if (!status) { + gl->DeleteProgram(prog); + prog = 0; + } + } + MP_VERBOSE(sc, "Loading cached shader %s.\n", prog ? "ok" : "failed"); + } + + if (!prog) { + prog = compile_program(sc, vertex, frag, compute); + + GLint size = 0; + gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size); + uint8_t *buffer = talloc_size(tmp, size + header_size); + GLsizei actual_size = 0; + GLenum binary_format = 0; + gl->GetProgramBinary(prog, size, &actual_size, &binary_format, + buffer + header_size); + memcpy(buffer, header, header_size - 4); + AV_WL32(buffer + header_size - 4, binary_format); + + if (actual_size) { + mp_mkdirp(dir); + + MP_VERBOSE(sc, "Writing shader cache file: %s\n", filename); + FILE *out = fopen(filename, "wb"); + if (out) { + fwrite(buffer, header_size + actual_size, 1, out); + fclose(out); + } + } + } + + talloc_free(tmp); + return prog; +} + +#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) +#define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) + +// 1. Generate vertex and fragment shaders from the fragment shader text added +// with gl_sc_add(). The generated shader program is cached (based on the +// text), so actual compilation happens only the first time. +// 2. Update the uniforms and textures set with gl_sc_uniform_*. +// 3. Make the new shader program current (glUseProgram()). +// After that, you render, and then you call gc_sc_reset(), which does: +// 1. Unbind the program and all textures. +// 2. Reset the sc state and prepare for a new shader program. (All uniforms +// and fragment operations needed for the next program have to be re-added.) +// The return value is a mp_pass_perf containing performance metrics for the +// execution of the generated shader. (Note: execution is measured up until +// the corresponding gl_sc_reset call) +// 'type' can be either GL_FRAGMENT_SHADER or GL_COMPUTE_SHADER +struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type) +{ + GL *gl = sc->gl; + + // gl_sc_reset() must be called after ending the previous render process, + // and before starting a new one. + assert(!sc->needs_reset); + + // gl_sc_set_vertex_format() must always be called + assert(sc->vertex_entries); + + for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++) + sc->tmp[n].len = 0; + + // set up shader text (header + uniforms + body) + bstr *header = &sc->tmp[0]; + ADD(header, "#version %d%s\n", gl->glsl_version, gl->es >= 300 ? " es" : ""); + if (type == GL_COMPUTE_SHADER) { + // This extension cannot be enabled in fragment shader. Enable it as + // an exception for compute shader. + ADD(header, "#extension GL_ARB_compute_shader : enable\n"); + } + for (int n = 0; n < sc->num_exts; n++) + ADD(header, "#extension %s : enable\n", sc->exts[n]); + if (gl->es) { + ADD(header, "precision mediump float;\n"); + ADD(header, "precision mediump sampler2D;\n"); + if (gl->mpgl_caps & MPGL_CAP_3D_TEX) + ADD(header, "precision mediump sampler3D;\n"); + } + + if (gl->glsl_version >= 130) { + ADD(header, "#define texture1D texture\n"); + ADD(header, "#define texture3D texture\n"); + } else { + ADD(header, "#define texture texture2D\n"); + } + + // Additional helpers. + ADD(header, "#define LUT_POS(x, lut_size)" + " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); + + char *vert_in = gl->glsl_version >= 130 ? "in" : "attribute"; + char *vert_out = gl->glsl_version >= 130 ? "out" : "varying"; + char *frag_in = gl->glsl_version >= 130 ? "in" : "varying"; + + struct bstr *vert = NULL, *frag = NULL, *comp = NULL; + + if (type == GL_FRAGMENT_SHADER) { + // vertex shader: we don't use the vertex shader, so just setup a + // dummy, which passes through the vertex array attributes. + bstr *vert_head = &sc->tmp[1]; + ADD_BSTR(vert_head, *header); + bstr *vert_body = &sc->tmp[2]; + ADD(vert_body, "void main() {\n"); + bstr *frag_vaos = &sc->tmp[3]; + for (int n = 0; sc->vertex_entries[n].name; n++) { + const struct gl_vao_entry *e = &sc->vertex_entries[n]; + const char *glsl_type = vao_glsl_type(e); + if (strcmp(e->name, "position") == 0) { + // setting raster pos. requires setting gl_Position magic variable + assert(e->num_elems == 2 && e->type == GL_FLOAT); + ADD(vert_head, "%s vec2 vertex_position;\n", vert_in); + ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); + } else { + ADD(vert_head, "%s %s vertex_%s;\n", vert_in, glsl_type, e->name); + ADD(vert_head, "%s %s %s;\n", vert_out, glsl_type, e->name); + ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); + ADD(frag_vaos, "%s %s %s;\n", frag_in, glsl_type, e->name); + } + } + ADD(vert_body, "}\n"); + vert = vert_head; + ADD_BSTR(vert, *vert_body); + + // fragment shader; still requires adding used uniforms and VAO elements + frag = &sc->tmp[4]; + ADD_BSTR(frag, *header); + if (gl->glsl_version >= 130) + ADD(frag, "out vec4 out_color;\n"); + ADD_BSTR(frag, *frag_vaos); + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name); + } + + ADD_BSTR(frag, sc->prelude_text); + ADD_BSTR(frag, sc->header_text); + + ADD(frag, "void main() {\n"); + // we require _all_ frag shaders to write to a "vec4 color" + ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); + ADD_BSTR(frag, sc->text); + if (gl->glsl_version >= 130) { + ADD(frag, "out_color = color;\n"); + } else { + ADD(frag, "gl_FragColor = color;\n"); + } + ADD(frag, "}\n"); + } + + if (type == GL_COMPUTE_SHADER) { + comp = &sc->tmp[4]; + ADD_BSTR(comp, *header); + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + ADD(comp, "uniform %s %s;\n", u->glsl_type, u->name); + } + + for (int n = 0; n < sc->num_buffers; n++) { + struct sc_buffer *b = &sc->buffers[n]; + ADD(comp, "layout(std430, binding=%d) buffer %s { %s };\n", + b->binding, b->name, b->format); + } + + ADD_BSTR(comp, sc->prelude_text); + ADD_BSTR(comp, sc->header_text); + + ADD(comp, "void main() {\n"); + ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience + ADD_BSTR(comp, sc->text); + ADD(comp, "}\n"); + } + + struct sc_entry *entry = NULL; + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *cur = &sc->entries[n]; + if (frag && !bstr_equals(cur->frag, *frag)) + continue; + if (vert && !bstr_equals(cur->vert, *vert)) + continue; + if (comp && !bstr_equals(cur->comp, *comp)) + continue; + entry = cur; + break; + } + if (!entry) { + if (sc->num_entries == SC_MAX_ENTRIES) + sc_flush_cache(sc); + MP_TARRAY_GROW(sc, sc->entries, sc->num_entries); + entry = &sc->entries[sc->num_entries++]; + *entry = (struct sc_entry){ + .vert = vert ? bstrdup(NULL, *vert) : (struct bstr){0}, + .frag = frag ? bstrdup(NULL, *frag) : (struct bstr){0}, + .comp = comp ? bstrdup(NULL, *comp) : (struct bstr){0}, + .timer = gl_timer_create(gl), + }; + } + // build shader program and cache the locations of the uniform variables + if (!entry->gl_shader) { + entry->gl_shader = load_program(sc, vert, frag, comp); + entry->num_uniforms = 0; + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_cached_uniform un = { + .loc = gl->GetUniformLocation(entry->gl_shader, + sc->uniforms[n].name), + }; + MP_TARRAY_APPEND(sc, entry->uniforms, entry->num_uniforms, un); + } + assert(!entry->vao.vao); + gl_vao_init(&entry->vao, gl, sc->vertex_size, sc->vertex_entries); + } + + gl->UseProgram(entry->gl_shader); + + assert(sc->num_uniforms == entry->num_uniforms); + + for (int n = 0; n < sc->num_uniforms; n++) + update_uniform(gl, entry, &sc->uniforms[n], n); + for (int n = 0; n < sc->num_buffers; n++) { + struct sc_buffer *b = &sc->buffers[n]; + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, b->ssbo); + } + + gl->ActiveTexture(GL_TEXTURE0); + + gl_timer_start(entry->timer); + sc->needs_reset = true; + sc->current_shader = entry; + + return gl_timer_measure(entry->timer); +} + +// Draw the vertex data (as described by the gl_vao_entry entries) in ptr +// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. +// gl_sc_generate() must have been called before this. Some additional setup +// might be needed (like setting the viewport). +void gl_sc_draw_data(struct gl_shader_cache *sc, GLenum prim, void *ptr, + size_t num) +{ + assert(ptr); + assert(sc->current_shader); + + gl_vao_draw_data(&sc->current_shader->vao, prim, ptr, num); +} diff --git a/video/out/opengl/shader_cache.h b/video/out/opengl/shader_cache.h new file mode 100644 index 0000000000..8b3bd15735 --- /dev/null +++ b/video/out/opengl/shader_cache.h @@ -0,0 +1,51 @@ +#pragma once + +#include "common.h" +#include "gl_utils.h" +#include "misc/bstr.h" +#include "ra.h" + +struct gl_shader_cache; + +struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log); +void gl_sc_destroy(struct gl_shader_cache *sc); +bool gl_sc_error_state(struct gl_shader_cache *sc); +void gl_sc_reset_error(struct gl_shader_cache *sc); +void gl_sc_add(struct gl_shader_cache *sc, const char *text); +void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_hadd(struct gl_shader_cache *sc, const char *text); +void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); +void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target, + GLuint texture); +void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, + struct ra_tex *tex); +void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture); +void gl_sc_uniform_image2D(struct gl_shader_cache *sc, const char *name, + GLuint texture, GLuint iformat, GLenum access); +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo, + char *format, ...) PRINTF_ATTRIBUTE(4, 5); +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f); +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f); +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]); +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3]); +void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, + bool transpose, GLfloat *v); +void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, + bool transpose, GLfloat *v); +void gl_sc_set_vertex_format(struct gl_shader_cache *sc, + const struct gl_vao_entry *entries, + size_t vertex_size); +void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); +struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type); +void gl_sc_draw_data(struct gl_shader_cache *sc, GLenum prim, void *ptr, + size_t num); +void gl_sc_reset(struct gl_shader_cache *sc); +struct mpv_global; +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global, + const char *dir); + diff --git a/video/out/opengl/user_shaders.h b/video/out/opengl/user_shaders.h index 5f3f1d0d93..065606165c 100644 --- a/video/out/opengl/user_shaders.h +++ b/video/out/opengl/user_shaders.h @@ -20,6 +20,7 @@ #include "common.h" #include "utils.h" +#include "ra.h" #define SHADER_MAX_PASSES 32 #define SHADER_MAX_HOOKS 16 diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 239009d4ac..6848e02ddc 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -1,256 +1,35 @@ -/* - * This file is part of mpv. - * Parts based on MPlayer code by Reimar Döffinger. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "osdep/io.h" - -#include "common/common.h" -#include "options/path.h" -#include "stream/stream.h" -#include "formats.h" -#include "ra_gl.h" +#include "common/msg.h" #include "utils.h" -// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) -static const char *gl_error_to_string(GLenum error) -{ - switch (error) { - case GL_INVALID_ENUM: return "INVALID_ENUM"; - case GL_INVALID_VALUE: return "INVALID_VALUE"; - case GL_INVALID_OPERATION: return "INVALID_OPERATION"; - case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; - case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; - default: return "unknown"; - } -} - -void gl_check_error(GL *gl, struct mp_log *log, const char *info) -{ - for (;;) { - GLenum error = gl->GetError(); - if (error == GL_NO_ERROR) - break; - mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, - gl_error_to_string(error)); - } -} - -static int get_alignment(int stride) -{ - if (stride % 8 == 0) - return 8; - if (stride % 4 == 0) - return 4; - if (stride % 2 == 0) - return 2; - return 1; -} - -// upload a texture, handling things like stride and slices -// target: texture target, usually GL_TEXTURE_2D -// format, type: texture parameters -// dataptr, stride: image data -// x, y, width, height: part of the image to upload -void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, - const void *dataptr, int stride, - int x, int y, int w, int h) -{ - int bpp = gl_bytes_per_pixel(format, type); - const uint8_t *data = dataptr; - int y_max = y + h; - if (w <= 0 || h <= 0 || !bpp) - return; - if (stride < 0) { - data += (h - 1) * stride; - stride = -stride; - } - gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); - int slice = h; - if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { - // this is not always correct, but should work for MPlayer - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); - } else { - if (stride != bpp * w) - slice = 1; // very inefficient, but at least it works - } - for (; y + slice <= y_max; y += slice) { - gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); - data += stride * slice; - } - if (y < y_max) - gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); - if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); - gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); -} - -mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) -{ - if (gl->es) - return NULL; // ES can't read from front buffer - mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); - if (!image) - return NULL; - gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); - GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; - gl->PixelStorei(GL_PACK_ALIGNMENT, 1); - gl->ReadBuffer(obj); - //flip image while reading (and also avoid stride-related trouble) - for (int y = 0; y < h; y++) { - gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, - image->planes[0] + y * image->stride[0]); - } - gl->PixelStorei(GL_PACK_ALIGNMENT, 4); - gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - return image; -} - -void mp_log_source(struct mp_log *log, int lev, const char *src) -{ - int line = 1; - if (!src) - return; - while (*src) { - const char *end = strchr(src, '\n'); - const char *next = end + 1; - if (!end) - next = end = src + strlen(src); - mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); - line++; - src = next; - } -} - - -struct gl_vao { - GL *gl; - GLuint vao; // the VAO object, or 0 if unsupported by driver - GLuint buffer; // GL_ARRAY_BUFFER used for the data - int stride; // size of each element (interleaved elements are assumed) - const struct gl_vao_entry *entries; -}; - -static void gl_vao_enable_attribs(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - for (int n = 0; vao->entries[n].name; n++) { - const struct gl_vao_entry *e = &vao->entries[n]; - - gl->EnableVertexAttribArray(n); - gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized, - vao->stride, (void *)(intptr_t)e->offset); - } -} - -static void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct gl_vao_entry *entries) -{ - assert(!vao->vao); - assert(!vao->buffer); - - *vao = (struct gl_vao){ - .gl = gl, - .stride = stride, - .entries = entries, - }; - - gl->GenBuffers(1, &vao->buffer); - - if (gl->BindVertexArray) { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - - gl->GenVertexArrays(1, &vao->vao); - gl->BindVertexArray(vao->vao); - gl_vao_enable_attribs(vao); - gl->BindVertexArray(0); - - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } -} - -static void gl_vao_uninit(struct gl_vao *vao) -{ - GL *gl = vao->gl; - if (!gl) - return; - - if (gl->DeleteVertexArrays) - gl->DeleteVertexArrays(1, &vao->vao); - gl->DeleteBuffers(1, &vao->buffer); - - *vao = (struct gl_vao){0}; -} - -static void gl_vao_bind(struct gl_vao *vao) +// Standard parallel 2D projection, except y1 < y0 means that the coordinate +// system is flipped, not the projection. +void gl_transform_ortho(struct gl_transform *t, float x0, float x1, + float y0, float y1) { - GL *gl = vao->gl; - - if (gl->BindVertexArray) { - gl->BindVertexArray(vao->vao); - } else { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - gl_vao_enable_attribs(vao); - gl->BindBuffer(GL_ARRAY_BUFFER, 0); + if (y1 < y0) { + float tmp = y0; + y0 = tmp - y1; + y1 = tmp; } -} -static void gl_vao_unbind(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - if (gl->BindVertexArray) { - gl->BindVertexArray(0); - } else { - for (int n = 0; vao->entries[n].name; n++) - gl->DisableVertexAttribArray(n); - } + t->m[0][0] = 2.0f / (x1 - x0); + t->m[0][1] = 0.0f; + t->m[1][0] = 0.0f; + t->m[1][1] = 2.0f / (y1 - y0); + t->t[0] = -(x1 + x0) / (x1 - x0); + t->t[1] = -(y1 + y0) / (y1 - y0); } -// Draw the vertex data (as described by the gl_vao_entry entries) in ptr -// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. -// If ptr is NULL, then skip the upload, and use the data uploaded with the -// previous call. -static void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) +// Apply the effects of one transformation to another, transforming it in the +// process. In other words: post-composes t onto x +void gl_transform_trans(struct gl_transform t, struct gl_transform *x) { - GL *gl = vao->gl; - - if (ptr) { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } - - gl_vao_bind(vao); - - gl->DrawArrays(prim, 0, num); - - gl_vao_unbind(vao); + struct gl_transform xt = *x; + x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0]; + x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0]; + x->m[0][1]