From 333cae74ef0fa62e0355e85d21f0f41ced3963e7 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 5 Aug 2017 14:20:14 +0200 Subject: vo_opengl: move shader handling to ra Now all GL-specifics of shader compilation are abstracted through ra. Of course we still have everything hardcoded to GLSL - that isn't going to change. Some things will probably change later - in particular, the way we pass uniforms and textures to the shader. Currently, there is a confusing mismatch between "primitive" uniforms like floats, and others like textures. Also, SSBOs are not abstracted yet. --- video/out/opengl/gl_utils.c | 26 +- video/out/opengl/gl_utils.h | 18 +- video/out/opengl/osd.c | 39 +- video/out/opengl/osd.h | 3 +- video/out/opengl/ra.c | 45 +++ video/out/opengl/ra.h | 164 ++++++++ video/out/opengl/ra_gl.c | 382 ++++++++++++++++++- video/out/opengl/ra_gl.h | 12 + video/out/opengl/shader_cache.c | 809 ++++++++++++++++------------------------ video/out/opengl/shader_cache.h | 52 +-- video/out/opengl/video.c | 83 ++--- 11 files changed, 1016 insertions(+), 617 deletions(-) (limited to 'video') diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c index c870756b1e..df6f0543ad 100644 --- a/video/out/opengl/gl_utils.c +++ b/video/out/opengl/gl_utils.c @@ -150,17 +150,32 @@ static void gl_vao_enable_attribs(struct gl_vao *vao) { GL *gl = vao->gl; - for (int n = 0; vao->entries[n].name; n++) { - const struct gl_vao_entry *e = &vao->entries[n]; + for (int n = 0; n < vao->num_entries; n++) { + const struct ra_renderpass_input *e = &vao->entries[n]; + GLenum type = 0; + bool normalized = false; + switch (e->type) { + case RA_VARTYPE_FLOAT: + type = GL_FLOAT; + break; + case RA_VARTYPE_BYTE_UNORM: + type = GL_UNSIGNED_BYTE; + normalized = true; + break; + default: + abort(); + } + assert(e->dim_m == 1); gl->EnableVertexAttribArray(n); - gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized, - vao->stride, (void *)(intptr_t)e->offset); + gl->VertexAttribPointer(n, e->dim_v, type, normalized, + vao->stride, (void *)(intptr_t)e->binding); } } void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct gl_vao_entry *entries) + const struct ra_renderpass_input *entries, + int num_entries) { assert(!vao->vao); assert(!vao->buffer); @@ -169,6 +184,7 @@ void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, .gl = gl, .stride = stride, .entries = entries, + .num_entries = num_entries, }; gl->GenBuffers(1, &vao->buffer); diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h index 5ae8d1590b..6192a6b312 100644 --- a/video/out/opengl/gl_utils.h +++ b/video/out/opengl/gl_utils.h @@ -34,32 +34,22 @@ void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); -const char* mp_sampler_type(GLenum texture_target); - // print a multi line string with line numbers (e.g. for shader sources) // log, lev: module and log level, as in mp_msg() void mp_log_source(struct mp_log *log, int lev, const char *src); -struct gl_vao_entry { - // used for shader / glBindAttribLocation - const char *name; - // glVertexAttribPointer() arguments - int num_elems; // size (number of elements) - GLenum type; - bool normalized; - int offset; -}; - struct gl_vao { GL *gl; GLuint vao; // the VAO object, or 0 if unsupported by driver GLuint buffer; // GL_ARRAY_BUFFER used for the data int stride; // size of each element (interleaved elements are assumed) - const struct gl_vao_entry *entries; + const struct ra_renderpass_input *entries; + int num_entries; }; void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct gl_vao_entry *entries); + const struct ra_renderpass_input *entries, + int num_entries); void gl_vao_uninit(struct gl_vao *vao); void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c index 89820693ab..a656451c2e 100644 --- a/video/out/opengl/osd.c +++ b/video/out/opengl/osd.c @@ -22,17 +22,16 @@ #include #include "formats.h" -#include "ra_gl.h" #include "osd.h" #define GLSL(x) gl_sc_add(sc, #x "\n"); // glBlendFuncSeparate() arguments static const int blend_factors[SUBBITMAP_COUNT][4] = { - [SUBBITMAP_LIBASS] = {GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, - GL_ONE, GL_ONE_MINUS_SRC_ALPHA}, - [SUBBITMAP_RGBA] = {GL_ONE, GL_ONE_MINUS_SRC_ALPHA, - GL_ONE, GL_ONE_MINUS_SRC_ALPHA}, + [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, + [SUBBITMAP_RGBA] = {RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, }; struct vertex { @@ -41,10 +40,10 @@ struct vertex { uint8_t ass_color[4]; }; -static const struct gl_vao_entry vertex_vao[] = { - {"position", 2, GL_FLOAT, false, offsetof(struct vertex, position)}, - {"texcoord" , 2, GL_FLOAT, false, offsetof(struct vertex, texcoord)}, - {"ass_color", 4, GL_UNSIGNED_BYTE, true, offsetof(struct vertex, ass_color)}, +static const struct ra_renderpass_input vertex_vao[] = { + {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, + {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)}, + {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)}, {0} }; @@ -53,7 +52,6 @@ struct mpgl_osd_part { int change_id; struct ra_tex *texture; int w, h; - struct gl_pbo_upload pbo; int num_subparts; int prev_num_subparts; struct sub_bitmap *subparts; @@ -65,7 +63,6 @@ struct mpgl_osd { struct mp_log *log; struct osd_state *osd; struct ra *ra; - GL *gl; struct mpgl_osd_part *parts[MAX_OSD_PARTS]; const struct ra_format *fmt_table[SUBBITMAP_COUNT]; bool formats[SUBBITMAP_COUNT]; @@ -79,14 +76,11 @@ struct mpgl_osd { struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, struct osd_state *osd) { - struct ra_gl *ra_gl = ra->priv; - struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); *ctx = (struct mpgl_osd) { .log = log, .osd = osd, .ra = ra, - .gl = ra_gl->gl, .scratch = talloc_zero_size(ctx, 1), }; @@ -289,9 +283,8 @@ static void get_3d_side_by_side(int stereo_mode, int div[2]) } void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index, - struct gl_shader_cache *sc) + struct gl_shader_cache *sc, struct ra_tex *target) { - GL *gl = ctx->gl; struct mpgl_osd_part *part = ctx->parts[index]; int div[2]; @@ -313,20 +306,10 @@ void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index, } } - if (!part->num_vertices) - return; - - gl->Enable(GL_BLEND); - const int *factors = &blend_factors[part->format][0]; - gl->BlendFuncSeparate(factors[0], factors[1], factors[2], factors[3]); - - ctx->gl->Viewport(0, 0, vp_w, abs(vp_h)); - - gl_sc_draw_data(sc, GL_TRIANGLES, part->vertices, part->num_vertices); + gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); - gl->BindTexture(GL_TEXTURE_2D, 0); - gl->Disable(GL_BLEND); + gl_sc_dispatch_draw(sc, target, part->vertices, part->num_vertices); } static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h index a0aa104a48..08c143ac0e 100644 --- a/video/out/opengl/osd.h +++ b/video/out/opengl/osd.h @@ -5,7 +5,6 @@ #include #include "utils.h" -#include "gl_utils.h" #include "shader_cache.h" #include "sub/osd.h" @@ -19,7 +18,7 @@ void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mod bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, struct gl_shader_cache *sc); void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index, - struct gl_shader_cache *sc); + struct gl_shader_cache *sc, struct ra_tex *target); int64_t mpgl_get_change_counter(struct mpgl_osd *ctx); #endif diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c index 9096a50148..e67dd6ebdf 100644 --- a/video/out/opengl/ra.c +++ b/video/out/opengl/ra.c @@ -16,6 +16,51 @@ void ra_tex_free(struct ra *ra, struct ra_tex **tex) *tex = NULL; } +static size_t vartype_size(enum ra_vartype type) +{ + switch (type) { + case RA_VARTYPE_INT: return sizeof(int); + case RA_VARTYPE_FLOAT: return sizeof(float); + case RA_VARTYPE_BYTE_UNORM: return 1; + default: return 0; + } +} + +// Return the size of the data ra_renderpass_input_val.data is going to point +// to. This returns 0 for non-primitive types such as textures. +size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input) +{ + size_t el_size = vartype_size(input->type); + return el_size * input->dim_v * input->dim_m; +} + +static struct ra_renderpass_input *dup_inputs(void *ta_parent, + const struct ra_renderpass_input *inputs, int num_inputs) +{ + struct ra_renderpass_input *res = + talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0])); + for (int n = 0; n < num_inputs; n++) + res[n].name = talloc_strdup(res, res[n].name); + return res; +} + +// Return a newly allocated deep-copy of params. +struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); + *res = *params; + res->inputs = dup_inputs(res, res->inputs, res->num_inputs); + res->vertex_attribs = + dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs); + res->cached_program = bstrdup(res, res->cached_program); + res->vertex_shader = talloc_strdup(res, res->vertex_shader); + res->frag_shader = talloc_strdup(res, res->frag_shader); + res->compute_shader = talloc_strdup(res, res->compute_shader); + return res; +}; + + // Return whether this is a tightly packed format with no external padding and // with the same bit size/depth in all components. static bool ra_format_is_regular(const struct ra_format *fmt) diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h index ab3027b78a..75ba0e5fe7 100644 --- a/video/out/opengl/ra.h +++ b/video/out/opengl/ra.h @@ -1,12 +1,16 @@ #pragma once #include "common/common.h" +#include "misc/bstr.h" // Handle for a rendering API backend. struct ra { struct ra_fns *fns; void *priv; + int glsl_version; // GLSL version (e.g. 300 => 3.0) + bool glsl_es; // use ES dialect + struct mp_log *log; // RA_CAP_* bit field. The RA backend must set supported features at init @@ -31,6 +35,9 @@ enum { RA_CAP_TEX_1D = 1 << 0, // supports 1D textures (as shader source textures) RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader source textures) RA_CAP_BLIT = 1 << 2, // supports ra_fns.blit + RA_CAP_COMPUTE = 1 << 3, // supports compute shaders + RA_CAP_PBO = 1 << 4, // supports ra.use_pbo + RA_CAP_NESTED_ARRAY = 1 << 5, }; enum ra_ctype { @@ -81,6 +88,7 @@ struct ra_tex_params { // if true, repeat texture coordinates bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy // always set to false, except in OSX code + bool external_oes; // hack for GL_TEXTURE_EXTERNAL_OES idiocy // If non-NULL, the texture will be created with these contents, and is // considered immutable afterwards (no upload, mapping, or rendering to it). void *initial_data; @@ -108,6 +116,149 @@ struct ra_mapped_buffer { size_t size; // total size of the mapping, starting at data }; +// Type of a shader uniform variable, or a vertex attribute. In all cases, +// vectors are matrices are done by having more than 1 value. +enum ra_vartype { + RA_VARTYPE_INVALID, + RA_VARTYPE_INT, // C: int, GLSL: int, ivec* + RA_VARTYPE_FLOAT, // C: float, GLSL: float, vec*, mat* + RA_VARTYPE_TEX, // C: ra_tex*, GLSL: various sampler types + // ra_tex.params.render_src must be true + RA_VARTYPE_IMG_W, // C: ra_tex*, GLSL: various image types + // write-only (W) image for compute shaders + RA_VARTYPE_BYTE_UNORM, // C: uint8_t, GLSL: int, vec* (vertex data only) + RA_VARTYPE_SSBO, // a hack for GL +}; + +// Represents a uniform, texture input parameter, and similar things. +struct ra_renderpass_input { + const char *name; // name as used in the shader + enum ra_vartype type; + // The total number of values is given by dim_v * dim_m. + int dim_v; // vector dimension (1 for non-vector and non-matrix) + int dim_m; // additional matrix dimension (dim_v x dim_m) + // Vertex data: byte offset of the attribute into the vertex struct + // RA_VARTYPE_TEX: texture unit + // RA_VARTYPE_IMG_W: image unit + // RA_VARTYPE_SSBO: whatever? + // Other uniforms: unused + int binding; +}; + +size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input); + +enum ra_blend { + RA_BLEND_ZERO, + RA_BLEND_ONE, + RA_BLEND_SRC_ALPHA, + RA_BLEND_ONE_MINUS_SRC_ALPHA, +}; + +enum ra_renderpass_type { + RA_RENDERPASS_TYPE_INVALID, + RA_RENDERPASS_TYPE_RASTER, // vertex+fragment shader + RA_RENDERPASS_TYPE_COMPUTE, // compute shader +}; + +// Static part of a rendering pass. It conflates the following: +// - compiled shader and its list of uniforms +// - vertex attributes and its shader mappings +// - blending parameters +// (For Vulkan, this would be shader module + pipeline state.) +// Upon creation, the values of dynamic values such as uniform contents (whose +// initial values are not provided here) are required to be 0. +struct ra_renderpass_params { + enum ra_renderpass_type type; + + // Uniforms, including texture/sampler inputs. + struct ra_renderpass_input *inputs; + int num_inputs; + + // Highly implementation-specific byte array storing a compiled version + // of the program. Can be used to speed up shader compilation. A backend + // xan read this in renderpass_create, or set this on the newly created + // ra_renderpass params field. + bstr cached_program; + + // --- type==RA_RENDERPASS_TYPE_RASTER only + + // Describes the format of the vertex data. + struct ra_renderpass_input *vertex_attribs; + int num_vertex_attribs; + int vertex_stride; + + // Shader text, in GLSL. (Yes, you need a GLSL compiler.) + // These are complete shaders, including prelude and declarations. + const char *vertex_shader; + const char *frag_shader; + + // Target blending mode. If enable_blend is false, the blend_ fields can + // be ignored. + bool enable_blend; + enum ra_blend blend_src_rgb; + enum ra_blend blend_dst_rgb; + enum ra_blend blend_src_alpha; + enum ra_blend blend_dst_alpha; + + // --- type==RA_RENDERPASS_TYPE_COMPUTE only + + // Shader text, like vertex_shader/frag_shader. + const char *compute_shader; +}; + +struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params); + +// Conflates the following typical GPU API concepts: +// - various kinds of shaders +// - rendering pipelines +// - descriptor sets, uniforms, other bindings +// - all synchronization necessary +// - the current values of all uniforms (this one makes it relatively stateful +// from an API perspective) +struct ra_renderpass { + // All fields are read-only after creation. + struct ra_renderpass_params params; + void *priv; +}; + +// An input value (see ra_renderpass_input). +struct ra_renderpass_input_val { + int index; // index into ra_renderpass_params.inputs[] + void *data; // pointer to data according to ra_renderpass_input + // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9]) +}; + +// Parameters for performing a rendering pass (basically the dynamic params). +// These change potentially every time. +struct ra_renderpass_run_params { + struct ra_renderpass *pass; + + // Generally this lists parameters only which changed since the last + // invocation and need to be updated. The ra_renderpass instance is + // supposed to keep unchanged values from the previous run. + // For non-primitive types like textures, these entries are always added, + // even if they do not change. + struct ra_renderpass_input_val *values; + int num_values; + + // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only + + // target->params.render_dst must be true. + struct ra_tex *target; + struct mp_rect viewport; + struct mp_rect scissors; + + // (The primitive type is always a triangle list.) + void *vertex_data; + int vertex_count; // number of vertex elements, not bytes + + // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only + + // Number of work groups to be run in X/Y/Z dimensions. + int compute_groups[3]; +}; + enum { // Flags for the texture_upload flags parameter. RA_TEX_UPLOAD_DISCARD = 1 << 0, // discard pre-existing data not in the region @@ -183,6 +334,19 @@ struct ra_fns { // not be called, even if it's non-NULL). void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, int dst_x, int dst_y, struct mp_rect *src_rc); + + // Compile a shader and create a pipeline. This is a rare operation. + // The params pointer and anything it points to must stay valid until + // renderpass_destroy. + struct ra_renderpass *(*renderpass_create)(struct ra *ra, + const struct ra_renderpass_params *params); + + void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass); + + // Perform a render pass, basically drawing a list of triangles to a FBO. + // This is an extremely common operation. + void (*renderpass_run)(struct ra *ra, + const struct ra_renderpass_run_params *params); }; struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params); diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index e1ecd337e4..7d52063295 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -1,5 +1,6 @@ -#include "formats.h" +#include +#include "formats.h" #include "ra_gl.h" static struct ra_fns ra_fns_gl; @@ -22,6 +23,14 @@ int ra_init_gl(struct ra *ra, GL *gl) ra->caps |= RA_CAP_TEX_3D; if (gl->BlitFramebuffer) ra->caps |= RA_CAP_BLIT; + if (gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER) + ra->caps |= RA_CAP_COMPUTE; + if (gl->MapBufferRange) + ra->caps |= RA_CAP_PBO; + if (gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY) + ra->caps |= RA_CAP_NESTED_ARRAY; + ra->glsl_version = gl->glsl_version; + ra->glsl_es = gl->es > 0; int gl_fmt_features = gl_format_feature_flags(gl); @@ -271,6 +280,7 @@ static struct ra_tex *wrap_tex_fbo(struct ra *ra, GLuint gl_obj, bool is_fbo, .render_dst = is_fbo, .render_src = !is_fbo, .non_normalized = gl_target == GL_TEXTURE_RECTANGLE, + .external_oes = gl_target == GL_TEXTURE_EXTERNAL_OES, }, }; @@ -310,6 +320,12 @@ struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h) return wrap_tex_fbo(ra, gl_fbo, true, 0, GL_RGBA, 0, 0, w, h); } +GL *ra_gl_get(struct ra *ra) +{ + struct ra_gl *p = ra->priv; + return p->gl; +} + static void gl_tex_upload(struct ra *ra, struct ra_tex *tex, const void *src, ptrdiff_t stride, struct mp_rect *rc, uint64_t flags, @@ -478,6 +494,367 @@ static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); } +static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) +{ + struct ra_gl *p = ra->priv; + struct ra_renderpass_gl *pass_gl = pass->priv; + p->gl->DeleteProgram(pass_gl->program); + gl_vao_uninit(&pass_gl->vao); + + talloc_free(pass_gl); + talloc_free(pass); +} + +static const char *shader_typestr(GLenum type) +{ + switch (type) { + case GL_VERTEX_SHADER: return "vertex"; + case GL_FRAGMENT_SHADER: return "fragment"; + case GL_COMPUTE_SHADER: return "compute"; + default: abort(); + } +} + +static void compile_attach_shader(struct ra *ra, GLuint program, + GLenum type, const char *source, bool *ok) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + GLuint shader = gl->CreateShader(type); + gl->ShaderSource(shader, 1, &source, NULL); + gl->CompileShader(shader); + GLint status = 0; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); + GLint log_length = 0; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + const char *typestr = shader_typestr(type); + if (mp_msg_test(ra->log, pri)) { + MP_MSG(ra, pri, "%s shader source:\n", typestr); + mp_log_source(ra->log, pri, source); + } + if (log_length > 1) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetShaderInfoLog(shader, log_length, NULL, logstr); + MP_MSG(ra, pri, "%s shader compile log (status=%d):\n%s\n", + typestr, status, logstr); + talloc_free(logstr); + } + if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(ra->log, MSGL_DEBUG)) { + GLint len = 0; + gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len); + if (len > 0) { + GLchar *sstr = talloc_zero_size(NULL, len + 1); + gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr); + MP_DBG(ra, "Translated shader:\n"); + mp_log_source(ra->log, MSGL_DEBUG, sstr); + } + } + + gl->AttachShader(program, shader); + gl->DeleteShader(shader); + + *ok &= status; +} + +static void link_shader(struct ra *ra, GLuint program, bool *ok) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + gl->LinkProgram(program); + GLint status = 0; + gl->GetProgramiv(program, GL_LINK_STATUS, &status); + GLint log_length = 0; + gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + if (mp_msg_test(ra->log, pri)) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetProgramInfoLog(program, log_length, NULL, logstr); + MP_MSG(ra, pri, "shader link log (status=%d): %s\n", status, logstr); + talloc_free(logstr); + } + + *ok &= status; +} + +// either 'compute' or both 'vertex' and 'frag' are needed +static GLuint compile_program(struct ra *ra, const struct ra_renderpass_params *p) +{ + struct ra_gl *priv = ra->priv; + GL *gl = priv->gl; + + GLuint prog = gl->CreateProgram(); + bool ok = true; + if (p->type == RA_RENDERPASS_TYPE_COMPUTE) + compile_attach_shader(ra, prog, GL_COMPUTE_SHADER, p->compute_shader, &ok); + if (p->type == RA_RENDERPASS_TYPE_RASTER) { + compile_attach_shader(ra, prog, GL_VERTEX_SHADER, p->vertex_shader, &ok); + compile_attach_shader(ra, prog, GL_FRAGMENT_SHADER, p->frag_shader, &ok); + for (int n = 0; n < p->num_vertex_attribs; n++) + gl->BindAttribLocation(prog, n, p->vertex_attribs[n].name); + } + link_shader(ra, prog, &ok); + if (!ok) { + gl->DeleteProgram(prog); + prog = 0; + } + return prog; +} + +static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p, + bstr *out_cached_data) +{ + struct ra_gl *priv = ra->priv; + GL *gl = priv->gl; + + GLuint prog = 0; + + if (gl->ProgramBinary && p->cached_program.len > 4) { + GLenum format = AV_RL32(p->cached_program.start); + prog = gl->CreateProgram(); + gl_check_error(gl, ra->log, "before loading program"); + gl->ProgramBinary(prog, format, p->cached_program.start + 4, + p->cached_program.len - 4); + gl->GetError(); // discard potential useless error + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + if (status) { + MP_VERBOSE(ra, "Loading binary program succeeded.\n"); + } else { + gl->DeleteProgram(prog); + prog = 0; + } + } + + if (!prog) { + prog = compile_program(ra, p); + + if (gl->GetProgramBinary && prog) { + GLint size = 0; + gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size); + uint8_t *buffer = talloc_size(NULL, size + 4); + GLsizei actual_size = 0; + GLenum binary_format = 0; + gl->GetProgramBinary(prog, size, &actual_size, &binary_format, + buffer + 4); + AV_WL32(buffer, binary_format); + if (actual_size) + *out_cached_data = (bstr){buffer, actual_size + 4}; + } + } + + return prog; +} + +static struct ra_renderpass *gl_renderpass_create(struct ra *ra, + const struct ra_renderpass_params *params) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); + pass->params = *ra_render_pass_params_copy(pass, params); + pass->params.cached_program = (bstr){0}; + struct ra_renderpass_gl *pass_gl = pass->priv = + talloc_zero(NULL, struct ra_renderpass_gl); + + bstr cached = {0}; + pass_gl->program = load_program(ra, params, &cached); + if (!pass_gl->program) { + gl_renderpass_destroy(ra, pass); + return NULL; + } + + talloc_steal(pass, cached.start); + pass->params.cached_program = cached; + + for (int n = 0; n < params->num_inputs; n++) { + GLint loc = + gl->GetUniformLocation(pass_gl->program, params->inputs[n].name); + MP_TARRAY_APPEND(pass_gl, pass_gl->uniform_loc, pass_gl->num_uniform_loc, + loc); + } + + gl_vao_init(&pass_gl->vao, gl, params->vertex_stride, params->vertex_attribs, + params->num_vertex_attribs); + + pass_gl->first_run = true; + + return pass; +} + +static GLenum map_blend(enum ra_blend blend) +{ + switch (blend) { + case RA_BLEND_ZERO: return GL_ZERO; + case RA_BLEND_ONE: return GL_ONE; + case RA_BLEND_SRC_ALPHA: return GL_SRC_ALPHA; + case RA_BLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA; + default: return 0; + } +} + +// Assumes program is current (gl->UseProgram(program)). +static void update_uniform(struct ra *ra, struct ra_renderpass *pass, + struct ra_renderpass_input_val *val) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct ra_renderpass_gl *pass_gl = pass->priv; + + struct ra_renderpass_input *input = &pass->params.inputs[val->index]; + assert(val->index >= 0 && val->index < pass_gl->num_uniform_loc); + GLint loc = pass_gl->uniform_loc[val->index]; + + switch (input->type) { + case RA_VARTYPE_INT: { + assert(input->dim_v * input->dim_m == 1); + if (loc < 0) + break; + gl->Uniform1i(loc, *(int *)val->data); + break; + } + case RA_VARTYPE_FLOAT: { + float *f = val->data; + if (loc < 0) + break; + if (input->dim_m == 1) { + switch (input->dim_v) { + case 1: gl->Uniform1f(loc, f[0]); break; + case 2: gl->Uniform2f(loc, f[0], f[1]); break; + case 3: gl->Uniform3f(loc, f[0], f[1], f[2]); break; + case 4: gl->Uniform4f(loc, f[0], f[1], f[2], f[3]); break; + default: abort(); + } + } else if (input->dim_v == 2 && input->dim_m == 2) { + gl->UniformMatrix2fv(loc, 1, GL_FALSE, f); + } else if (input->dim_v == 3 && input->dim_m == 3) { + gl->UniformMatrix3fv(loc, 1, GL_FALSE, f); + } else { + abort(); + } + break; + } + case RA_VARTYPE_IMG_W: /* fall through */ + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct ra_tex_gl *tex_gl = tex->priv; + assert(tex->params.render_src); + if (pass_gl->first_run) + gl->Uniform1i(loc, input->binding); + if (input->type == RA_VARTYPE_TEX) { + gl->ActiveTexture(GL_TEXTURE0 + input->binding); + gl->BindTexture(tex_gl->target, tex_gl->texture); + } else { + gl->BindImageTexture(input->binding, tex_gl->texture, 0, GL_FALSE, 0, + GL_WRITE_ONLY, tex_gl->internal_format); + } + break; + } + case RA_VARTYPE_SSBO: { + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, + *(int *)val->data); + break; + } + default: + abort(); + } +} + +static void disable_binding(struct ra *ra, struct ra_renderpass *pass, + struct ra_renderpass_input_val *val) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + struct ra_renderpass_input *input = &pass->params.inputs[val->index]; + + switch (input->type) { + case RA_VARTYPE_IMG_W: /* fall through */ + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct ra_tex_gl *tex_gl = tex->priv; + assert(tex->params.render_src); + if (input->type == RA_VARTYPE_TEX) { + gl->ActiveTexture(GL_TEXTURE0 + input->binding); + gl->BindTexture(tex_gl->target, 0); + } else { + gl->BindImageTexture(input->binding, 0, 0, GL_FALSE, 0, + GL_WRITE_ONLY, tex_gl->internal_format); + } + break; + } + case RA_VARTYPE_SSBO: { + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, 0); + break; + } + } +} + +static void gl_renderpass_run(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct ra_renderpass *pass = params->pass; + struct ra_renderpass_gl *pass_gl = pass->priv; + + gl->UseProgram(pass_gl->program); + + for (int n = 0; n < params->num_values; n++) + update_uniform(ra, pass, ¶ms->values[n]); + gl->ActiveTexture(GL_TEXTURE0); + + switch (pass->params.type) { + case RA_RENDERPASS_TYPE_RASTER: { + struct ra_tex_gl *target_gl = params->target->priv; + assert(params->target->params.render_dst); + gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo); + gl->Viewport(params->viewport.x0, params->viewport.y0, + mp_rect_w(params->viewport), + mp_rect_h(params->viewport)); + gl->Scissor(params->scissors.x0, params->scissors.y0, + mp_rect_w(params->scissors), + mp_rect_h(params->scissors)); + gl->Enable(GL_SCISSOR_TEST); + if (pass->params.enable_blend) { + gl->BlendFuncSeparate(map_blend(pass->params.blend_src_rgb), + map_blend(pass->params.blend_dst_rgb), + map_blend(pass->params.blend_src_alpha), + map_blend(pass->params.blend_dst_alpha)); + gl->Enable(GL_BLEND); + } + gl_vao_draw_data(&pass_gl->vao, GL_TRIANGLES, params->vertex_data, + params->vertex_count); + gl->Disable(GL_SCISSOR_TEST); + gl->Disable(GL_BLEND); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + break; + } + case RA_RENDERPASS_TYPE_COMPUTE: { + gl->DispatchCompute(params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + + gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + break; + } + default: abort(); + } + + for (int n = 0; n < params->num_values; n++) + disable_binding(ra, pass, ¶ms->values[n]); + gl->ActiveTexture(GL_TEXTURE0); + + gl->UseProgram(0); + + pass_gl->first_run = false; +} + static struct ra_fns ra_fns_gl = { .destroy = gl_destroy, .tex_create = gl_tex_create, @@ -488,4 +865,7 @@ static struct ra_fns ra_fns_gl = { .poll_mapped_buffer = gl_poll_mapped_buffer, .clear = gl_clear, .blit = gl_blit, + .renderpass_create = gl_renderpass_create, + .renderpass_destroy = gl_renderpass_destroy, + .renderpass_run = gl_renderpass_run, }; diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h index 016ce13419..0d3828c978 100644 --- a/video/out/opengl/ra_gl.h +++ b/video/out/opengl/ra_gl.h @@ -28,9 +28,21 @@ struct ra_mapped_buffer_gl { GLsync fence; }; +// For ra_renderpass.priv +struct ra_renderpass_gl { + GLuint program; + // 1 entry for each ra_renderpass_params.inputs[] entry + GLint *uniform_loc; + int num_uniform_loc; // == ra_renderpass_params.num_inputs + struct gl_vao vao; + bool first_run; +}; + int ra_init_gl(struct ra *ra, GL *gl); struct ra_tex *ra_create_wrapped_texture(struct ra *ra, GLuint gl_texture, GLenum gl_target, GLint gl_iformat, GLenum gl_format, GLenum gl_type, int w, int h); struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h); + +GL *ra_gl_get(struct ra *ra); diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c index 7f8b37be64..fdfafbeb60 100644 --- a/video/out/opengl/shader_cache.c +++ b/video/out/opengl/shader_cache.c @@ -6,7 +6,6 @@ #include #include -#include #include #include "osdep/io.h" @@ -17,63 +16,37 @@ #include "shader_cache.h" #include "formats.h" #include "ra_gl.h" -#include "gl_utils.h" // Force cache flush if more than this number of shaders is created. #define SC_MAX_ENTRIES 48 -enum uniform_type { - UT_invalid, - UT_i, - UT_f, - UT_m, -}; - union uniform_val { - GLfloat f[9]; - GLint i[4]; + float f[9]; // RA_VARTYPE_FLOAT + int i[4]; // RA_VARTYPE_INT, RA_VARTYPE_SSBO + struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_* }; struct sc_uniform { - char *name; - enum uniform_type type; + struct ra_renderpass_input input; const char *glsl_type; - int size; - GLint loc; union uniform_val v; - // Set for sampler uniforms. - GLenum tex_target; - GLuint tex_handle; - // Set for image uniforms - GLuint img_handle; - GLenum img_access; - GLenum img_iformat; -}; - -struct sc_buffer { - char *name; - char *format; - GLuint binding; - GLuint ssbo; + char *ssbo_format; }; struct sc_cached_uniform { - GLint loc; union uniform_val v; }; struct sc_entry { - GLuint gl_shader; - struct sc_cached_uniform *uniforms; - int num_uniforms; - bstr frag; - bstr vert; - bstr comp; + struct ra_renderpass *pass; + struct sc_cached_uniform *cached_uniforms; + int num_cached_uniforms; + bstr total; struct gl_timer *timer; - struct gl_vao vao; }; struct gl_shader_cache { + struct ra *ra; GL *gl; struct mp_log *log; @@ -88,20 +61,19 @@ struct gl_shader_cache { int next_texture_unit; int next_image_unit; int next_buffer_binding; - struct gl_vao *vao; // deprecated - struct sc_entry *entries; + struct ra_renderpass_params params; + + struct sc_entry **entries; int num_entries; struct sc_entry *current_shader; // set by gl_sc_generate() struct sc_uniform *uniforms; int num_uniforms; - struct sc_buffer *buffers; - int num_buffers; - const struct gl_vao_entry *vertex_entries; - size_t vertex_size; + struct ra_renderpass_input_val *values; + int num_values; // For checking that the user is calling gl_sc_reset() properly. bool needs_reset; @@ -109,18 +81,23 @@ struct gl_shader_cache { bool error_state; // true if an error occurred // temporary buffers (avoids frequent reallocations) - bstr tmp[5]; + bstr tmp[6]; // For the disk-cache. char *cache_dir; struct mpv_global *global; // can be NULL }; -struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log) +static void gl_sc_reset(struct gl_shader_cache *sc); + +struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, + struct mp_log *log) { struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); *sc = (struct gl_shader_cache){ - .gl = gl, + .ra = ra, + .gl = ra_gl_get(ra), + .global = global, .log = log, }; gl_sc_reset(sc); @@ -129,50 +106,24 @@ struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log) // Reset the previous pass. This must be called after // Unbind all GL state managed by sc - the current program and texture units. -void gl_sc_reset(struct gl_shader_cache *sc) +static void gl_sc_reset(struct gl_shader_cache *sc) { GL *gl = sc->gl; - if (sc->needs_reset) { + if (sc->needs_reset) gl_timer_stop(gl); - gl->UseProgram(0); - - for (int n = 0; n < sc->num_uniforms; n++) { - struct sc_uniform *u = &sc->uniforms[n]; - if (u->type == UT_i && u->tex_target) { - gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]); - gl->BindTexture(u->tex_target, 0); - } - if (u->type == UT_i && u->img_access) { - gl->BindImageTexture(u->v.i[0], 0, 0, GL_FALSE, 0, - u->img_access, u->img_iformat); - } - } - gl->ActiveTexture(GL_TEXTURE0); - - for (int n = 0; n < sc->num_buffers; n++) { - struct sc_buffer *b = &sc->buffers[n]; - gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, 0); - } - } sc->prelude_text.len = 0; sc->header_text.len = 0; sc->text.len = 0; for (int n = 0; n < sc->num_uniforms; n++) - talloc_free(sc->uniforms[n].name); + talloc_free((void *)sc->uniforms[n].input.name); sc->num_uniforms = 0; - for (int n = 0; n < sc->num_buffers; n++) { - talloc_free(sc->buffers[n].name); - talloc_free(sc->buffers[n].format); - } - sc->num_buffers = 0; sc->next_texture_unit = 1; // not 0, as 0 is "free for use" sc->next_image_unit = 1; sc->next_buffer_binding = 1; - sc->vertex_entries = NULL; - sc->vertex_size = 0; sc->current_shader = NULL; + sc->params = (struct ra_renderpass_params){0}; sc->needs_reset = false; } @@ -181,14 +132,11 @@ static void sc_flush_cache(struct gl_shader_cache *sc) MP_VERBOSE(sc, "flushing shader cache\n"); for (int n = 0; n < sc->num_entries; n++) { - struct sc_entry *e = &sc->entries[n]; - sc->gl->DeleteProgram(e->gl_shader); - talloc_free(e->vert.start); - talloc_free(e->frag.start); - talloc_free(e->comp.start); - talloc_free(e->uniforms); + struct sc_entry *e = sc->entries[n]; + if (e->pass) + sc->ra->fns->renderpass_destroy(sc->ra, e->pass); gl_timer_free(e->timer); - gl_vao_uninit(&e->vao); + talloc_free(e); } sc->num_entries = 0; } @@ -265,144 +213,102 @@ void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, const char *name) { - for (int n = 0; n < sc->num_uniforms; n++) { - if (strcmp(sc->uniforms[n].name, name) == 0) - return &sc->uniforms[n]; - } - // not found -> add it struct sc_uniform new = { - .loc = -1, - .name = talloc_strdup(NULL, name), - }; - MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); - return &sc->uniforms[sc->num_uniforms - 1]; -} - -static struct sc_buffer *find_buffer(struct gl_shader_cache *sc, - const char *name) -{ - for (int n = 0; n < sc->num_buffers; n++) { - if (strcmp(sc->buffers[n].name, name) == 0) - return &sc->buffers[n]; - } - // not found -> add it - struct sc_buffer new = { - .name = talloc_strdup(NULL, name), + .input = { + .dim_v = 1, + .dim_m = 1, + }, }; - MP_TARRAY_APPEND(sc, sc->buffers, sc->num_buffers, new); - return &sc->buffers[sc->num_buffers - 1]; -} -const char *mp_sampler_type(GLenum texture_target) -{ - switch (texture_target) { - case GL_TEXTURE_1D: return "sampler1D"; - case GL_TEXTURE_2D: return "sampler2D"; - case GL_TEXTURE_RECTANGLE: return "sampler2DRect"; - case GL_TEXTURE_EXTERNAL_OES: return "samplerExternalOES"; - case GL_TEXTURE_3D: return "sampler3D"; - default: abort(); + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (strcmp(u->input.name, name) == 0) { + const char *allocname = u->input.name; + *u = new; + u->input.name = allocname; + return u; + } } -} -void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target, - GLuint texture) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_i; - u->size = 1; - u->glsl_type = mp_sampler_type(target); - u->v.i[0] = sc->next_texture_unit++; - u->tex_target = target; - u->tex_handle = texture; -} - -void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_i; - u->size = 1; - u->glsl_type = sc->gl->es ? "highp usampler2D" : "usampler2D"; - u->v.i[0] = sc->next_texture_unit++; - u->tex_target = GL_TEXTURE_2D; - u->tex_handle = texture; + // not found -> add it + new.input.name = talloc_strdup(NULL, name); + MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); + return &sc->uniforms[sc->num_uniforms - 1]; } void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, struct ra_tex *tex) { - struct ra_tex_gl *tex_gl = tex->priv; - if (tex->params.format->ctype == RA_CTYPE_UINT) { - gl_sc_uniform_tex_ui(sc, name, tex_gl->texture); - } else { - gl_sc_uniform_tex(sc, name, tex_gl->target, tex_gl->texture); + const char *glsl_type = "sampler2D"; + if (tex->params.dimensions == 1) { + glsl_type = "sampler1D"; + } else if (tex->params.dimensions == 3) { + glsl_type = "sampler3D"; + } else if (tex->params.non_normalized) { + glsl_type = "sampler2DRect"; + } else if (tex->params.external_oes) { + glsl_type = "samplerExternalOES"; + } else if (tex->params.format->ctype == RA_CTYPE_UINT) { + glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D"; } -} -static const char *mp_image2D_type(GLenum access) -{ - switch (access) { - case GL_WRITE_ONLY: return "writeonly image2D"; - case GL_READ_ONLY: return "readonly image2D"; - case GL_READ_WRITE: return "image2D"; - default: abort(); - } + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_TEX; + u->glsl_type = glsl_type; + u->input.binding = sc->next_texture_unit++; + u->v.tex = tex; } -void gl_sc_uniform_image2D(struct gl_shader_cache *sc, const char *name, - GLuint texture, GLuint iformat, GLenum access) +void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, + struct ra_tex *tex) { gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_i; - u->size = 1; - u->glsl_type = mp_image2D_type(access); - u->v.i[0] = sc->next_image_unit++; - u->img_handle = texture; - u->img_access = access; - u->img_iformat = iformat; + u->input.type = RA_VARTYPE_IMG_W; + u->glsl_type = "writeonly image2D"; + u->input.binding = sc->next_image_unit++; + u->v.tex = tex; } -void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo, +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, int gl_ssbo, char *format, ...) { gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); - struct sc_buffer *b = find_buffer(sc, name); - b->binding = sc->next_buffer_binding++; - b->ssbo = ssbo; - b->format = format; + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_SSBO; + u->glsl_type = ""; + u->input.binding = sc->next_buffer_binding++; + u->v.i[0] = gl_ssbo; va_list ap; va_start(ap, format); - b->format = ta_vasprintf(sc, format, ap); + u->ssbo_format = ta_vasprintf(sc, format, ap); va_end(ap); } -void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f) +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) { struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_f; - u->size = 1; + u->input.type = RA_VARTYPE_FLOAT; u->glsl_type = "float"; u->v.f[0] = f; } -void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint i) +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) { struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_i; - u->size = 1; + u->input.type = RA_VARTYPE_INT; u->glsl_type = "int"; u->v.i[0] = i; } -void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]) +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) { struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_f; - u->size = 2; + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; u->glsl_type = "vec2"; u->v.f[0] = f[0]; u->v.f[1] = f[1]; @@ -411,8 +317,8 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]) void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3]) { struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_f; - u->size = 3; + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; u->glsl_type = "vec3"; u->v.f[0] = f[0]; u->v.f[1] = f[1]; @@ -428,8 +334,9 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, bool transpose, GLfloat *v) { struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_m; - u->size = 2; + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; + u->input.dim_m = 2; u->glsl_type = "mat2"; for (int n = 0; n < 4; n++) u->v.f[n] = v[n]; @@ -448,8 +355,9 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, bool transpose, GLfloat *v) { struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_m; - u->size = 3; + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; + u->input.dim_m = 3; u->glsl_type = "mat3"; for (int n = 0; n < 9; n++) u->v.f[n] = v[n]; @@ -461,17 +369,33 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, // data layout and attribute names. The entries array is terminated with a {0} // entry. The array memory must remain valid indefinitely (for now). void gl_sc_set_vertex_format(struct gl_shader_cache *sc, - const struct gl_vao_entry *entries, - size_t vertex_size) + const struct ra_renderpass_input *entries, + int vertex_stride) { - sc->vertex_entries = entries; - sc->vertex_size = vertex_size; + sc->params.vertex_attribs = (struct ra_renderpass_input *)entries; + sc->params.num_vertex_attribs = 0; + while (entries[sc->params.num_vertex_attribs].name) + sc->params.num_vertex_attribs++; + sc->params.vertex_stride = vertex_stride; } -static const char *vao_glsl_type(const struct gl_vao_entry *e) +void gl_sc_blend(struct gl_shader_cache *sc, + enum ra_blend blend_src_rgb, + enum ra_blend blend_dst_rgb, + enum ra_blend blend_src_alpha, + enum ra_blend blend_dst_alpha) +{ + sc->params.enable_blend = true; + sc->params.blend_src_rgb = blend_src_rgb; + sc->params.blend_dst_rgb = blend_dst_rgb; + sc->params.blend_src_alpha = blend_src_alpha; + sc->params.blend_dst_alpha = blend_dst_alpha; +} + +static const char *vao_glsl_type(const struct ra_renderpass_input *e) { // pretty dumb... too dumb, but works for us - switch (e->num_elems) { + switch (e->dim_v) { case 1: return "float"; case 2: return "vec2"; case 3: return "vec3"; @@ -480,165 +404,36 @@ static const char *vao_glsl_type(const struct gl_vao_entry *e) } } -// Assumes program is current (gl->UseProgram(program)). -static void update_uniform(GL *gl, struct sc_entry *e, struct sc_uniform *u, int n) +static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, + struct sc_uniform *u, int n) { - struct sc_cached_uniform *un = &e->uniforms[n]; - GLint loc = un->loc; - if (loc < 0) - return; - switch (u->type) { - case UT_i: - assert(u->size == 1); - if (memcmp(un->v.i, u->v.i, sizeof(u->v.i)) != 0) { - memcpy(un->v.i, u->v.i, sizeof(u->v.i)); - gl->Uniform1i(loc, u->v.i[0]); - } - // For samplers: set the actual texture. - if (u->tex_target) { - gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]); - gl->BindTexture(u->tex_target, u->tex_handle); - } - if (u->img_handle) { - gl->BindImageTexture(u->v.i[0], u->img_handle, 0, GL_FALSE, 0, - u->img_access, u->img_iformat); - } - break; - case UT_f: - if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) { - memcpy(un->v.f, u->v.f, sizeof(u->v.f)); - switch (u->size) { - case 1: gl->Uniform1f(loc, u->v.f[0]); break; - case 2: gl->Uniform2f(loc, u->v.f[0], u->v.f[1]); break; - case 3: gl->Uniform3f(loc, u->v.f[0], u->v.f[1], u->v.f[2]); break; - case 4: gl->Uniform4f(loc, u->v.f[0], u->v.f[1], u->v.f[2], - u->v.f[3]); break; - default: abort(); - } - } - break; - case UT_m: - if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) { - memcpy(un->v.f, u->v.f, sizeof(u->v.f)); - switch (u->size) { - case 2: gl->UniformMatrix2fv(loc, 1, GL_FALSE, &u->v.f[0]); break; - case 3: gl->UniformMatrix3fv(loc, 1, GL_FALSE, &u->v.f[0]); break; - default: abort(); - } - } - break; - default: - abort(); + struct sc_cached_uniform *un = &e->cached_uniforms[n]; + struct ra_renderpass_input *input = &e->pass->params.inputs[n]; + size_t size = ra_render_pass_input_data_size(input); + bool changed = true; + if (size > 0) + changed = memcmp(&un->v, &u->v, size) != 0; + + if (changed) { + un->v = u->v; + struct ra_renderpass_input_val value = { + .index = n, + .data = &un->v, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); } } -void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global, - const char *dir) +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir) { talloc_free(sc->cache_dir); sc->cache_dir = talloc_strdup(sc, dir); - sc->global = global; } -static const char *shader_typestr(GLenum type) +static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) { - switch (type) { - case GL_VERTEX_SHADER: return "vertex"; - case GL_FRAGMENT_SHADER: return "fragment"; - case GL_COMPUTE_SHADER: return "compute"; - default: abort(); - } -} - -static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program, - GLenum type, const char *source) -{ - GL *gl = sc->gl; - - GLuint shader = gl->CreateShader(type); - gl->ShaderSource(shader, 1, &source, NULL); - gl->CompileShader(shader); - GLint status = 0; - gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); - GLint log_length = 0; - gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); - - int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; - const char *typestr = shader_typestr(type); - if (mp_msg_test(sc->log, pri)) { - MP_MSG(sc, pri, "%s shader source:\n", typestr); - mp_log_source(sc->log, pri, source); - } - if (log_length > 1) { - GLchar *logstr = talloc_zero_size(NULL, log_length + 1); - gl->GetShaderInfoLog(shader, log_length, NULL, logstr); - MP_MSG(sc, pri, "%s shader compile log (status=%d):\n%s\n", - typestr, status, logstr); - talloc_free(logstr); - } - if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(sc->log, MSGL_DEBUG)) { - GLint len = 0; - gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len); - if (len > 0) { - GLchar *sstr = talloc_zero_size(NULL, len + 1); - gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr); - MP_DBG(sc, "Translated shader:\n"); - mp_log_source(sc->log, MSGL_DEBUG, sstr); - } - } - - gl->AttachShader(program, shader); - gl->DeleteShader(shader); - - if (!status) - sc->error_state = true; -} - -static void link_shader(struct gl_shader_cache *sc, GLuint program) -{ - GL *gl = sc->gl; - gl->LinkProgram(program); - GLint status = 0; - gl->GetProgramiv(program, GL_LINK_STATUS, &status); - GLint log_length = 0; - gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - - int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; - if (mp_msg_test(sc->log, pri)) { - GLchar *logstr = talloc_zero_size(NULL, log_length + 1); - gl->GetProgramInfoLog(program, log_length, NULL, logstr); - MP_MSG(sc, pri, "shader link log (status=%d): %s\n", status, logstr); - talloc_free(logstr); - } - - if (!status) - sc->error_state = true; -} - -// either 'compute' or both 'vertex' and 'frag' are needed -static GLuint compile_program(struct gl_shader_cache *sc, struct bstr *vertex, - struct bstr *frag, struct bstr *compute) -{ - GL *gl = sc->gl; - GLuint prog = gl->CreateProgram(); - if (compute) - compile_attach_shader(sc, prog, GL_COMPUTE_SHADER, compute->start); - if (vertex && frag) { - compile_attach_shader(sc, prog, GL_VERTEX_SHADER, vertex->start); - compile_attach_shader(sc, prog, GL_FRAGMENT_SHADER, frag->start); - for (int n = 0; sc->vertex_entries[n].name; n++) { - char *vname = mp_tprintf(80, "vertex_%s", sc->vertex_entries[n].name); - gl->BindAttribLocation(prog, n, vname); - } - } - link_shader(sc, prog); - return prog; -} - -static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex, - struct bstr *frag, struct bstr *compute) -{ - GL *gl = sc->gl; + void *tmp = talloc_new(NULL); + struct ra_renderpass_params params = sc->params; MP_VERBOSE(sc, "new shader program:\n"); if (sc->header_text.len) { @@ -649,98 +444,94 @@ static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex, if (sc->text.len) mp_log_source(sc->log, MSGL_V, sc->text.start); - if (!sc->cache_dir || !sc->cache_dir[0] || !gl->ProgramBinary) - return compile_program(sc, vertex, frag, compute); - - // Try to load it from a disk cache, or compiling + saving it. - - GLuint prog = 0; - void *tmp = talloc_new(NULL); - char *dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); - - struct AVSHA *sha = av_sha_alloc(); - if (!sha) - abort(); - av_sha_init(sha, 256); - - if (vertex) - av_sha_update(sha, vertex->start, vertex->len + 1); - if (frag) - av_sha_update(sha, frag->start, frag->len + 1); - if (compute) - av_sha_update(sha, compute->start, compute->len + 1); - - // In theory, the array could change order, breaking old binaries. - for (int n = 0; sc->vertex_entries[n].name; n++) { - av_sha_update(sha, sc->vertex_entries[n].name, - strlen(sc->vertex_entries[n].name) + 1); + // The vertex shader uses mangled names for the vertex attributes, so that + // the fragment shader can use the "real" names. But the shader is expecting + // the vertex attribute names (at least with older GLSL targets for GL). + params.vertex_attribs = talloc_memdup(tmp, params.vertex_attribs, + params.num_vertex_attribs * sizeof(params.vertex_attribs[0])); + for (int n = 0; n < params.num_vertex_attribs; n++) { + struct ra_renderpass_input *attrib = ¶ms.vertex_attribs[n]; + attrib->name = talloc_asprintf(tmp, "vertex_%s", attrib->name); } - uint8_t hash[256 / 8]; - av_sha_final(sha, hash); - av_free(sha); - - char hashstr[256 / 8 * 2 + 1]; - for (int n = 0; n < 256 / 8; n++) - snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); - - const char *header = "mpv shader cache v1\n"; - size_t header_size = strlen(header) + 4; - - char *filename = mp_path_join(tmp, dir, hashstr); - if (stat(filename, &(struct stat){0}) == 0) { - MP_VERBOSE(sc, "Trying to load shader from disk...\n"); - struct bstr cachedata = stream_read_file(filename, tmp, sc->global, - 1000000000); // 1 GB - if (cachedata.len > header_size) { - GLenum format = AV_RL32(cachedata.start + header_size - 4); - prog = gl->CreateProgram(); - gl_check_error(gl, sc->log, "before loading program"); - gl->ProgramBinary(prog, format, cachedata.start + header_size, - cachedata.len - header_size); - gl->GetError(); // discard potential useless error - GLint status = 0; - gl->GetProgramiv(prog, GL_LINK_STATUS, &status); - if (!status) { - gl->DeleteProgram(prog); - prog = 0; - } + const char *cache_header = "mpv shader cache v1\n"; + char *cache_filename = NULL; + char *cache_dir = NULL; + + if (sc->cache_dir && sc->cache_dir[0]) { + // Try to load it from a disk cache. + cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); + + struct AVSHA *sha = av_sha_alloc(); + if (!sha) + abort(); + av_sha_init(sha, 256); + av_sha_update(sha, entry->total.start, entry->total.len); + + uint8_t hash[256 / 8]; + av_sha_final(sha, hash); + av_free(sha); + + char hashstr[256 / 8 * 2 + 1]; + for (int n = 0; n < 256 / 8; n++) + snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); + + cache_filename = mp_path_join(tmp, cache_dir, hashstr); + if (stat(cache_filename, &(struct stat){0}) == 0) { + MP_VERBOSE(sc, "Trying to load shader from disk...\n"); + struct bstr cachedata = + stream_read_file(cache_filename, tmp, sc->global, 1000000000); + if (bstr_eatstart0(&cachedata, cache_header)) + params.cached_program = cachedata; } - MP_VERBOSE(sc, "Loading cached shader %s.\n", prog ? "ok" : "failed"); } - if (!prog) { - prog = compile_program(sc, vertex, frag, compute); + entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); - GLint size = 0; - gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size); - uint8_t *buffer = talloc_size(tmp, size + header_size); - GLsizei actual_size = 0; - GLenum binary_format = 0; - gl->GetProgramBinary(prog, size, &actual_size, &binary_format, - buffer + header_size); - memcpy(buffer, header, header_size - 4); - AV_WL32(buffer + header_size - 4, binary_format); + if (!entry->pass) + sc->error_state = true; - if (actual_size) { - mp_mkdirp(dir); + if (entry->pass && cache_filename) { + bstr nc = entry->pass->params.cached_program; + if (nc.len && !bstr_equals(params.cached_program, nc)) { + mp_mkdirp(cache_dir); - MP_VERBOSE(sc, "Writing shader cache file: %s\n", filename); - FILE *out = fopen(filename, "wb"); + MP_VERBOSE(sc, "Writing shader cache file: %s\n", cache_filename); + FILE *out = fopen(cache_filename, "wb"); if (out) { - fwrite(buffer, header_size + actual_size, 1, out); + fwrite(cache_header, strlen(cache_header), 1, out); + fwrite(nc.start, nc.len, 1, out); fclose(out); } } } talloc_free(tmp); - return prog; } #define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) #define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) +static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) +{ + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + switch (u->input.type) { + case RA_VARTYPE_INT: + case RA_VARTYPE_FLOAT: + case RA_VARTYPE_TEX: + case RA_VARTYPE_IMG_W: + ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); + break; + case RA_VARTYPE_SSBO: + ADD(dst, "layout(std430, binding=%d) buffer %s { %s };\n", + u->input.binding, u->input.name, u->ssbo_format); + break; + default: abort(); + } + } +} + // 1. Generate vertex and fragment shaders from the fragment shader text added // with gl_sc_add(). The generated shader program is cached (based on the // text), so actual compilation happens only the first time. @@ -753,39 +544,44 @@ static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex, // The return value is a mp_pass_perf containing performance metrics for the // execution of the generated shader. (Note: execution is measured up until // the corresponding gl_sc_reset call) -// 'type' can be either GL_FRAGMENT_SHADER or GL_COMPUTE_SHADER -struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type) +// 'type' must be valid +static struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, + enum ra_renderpass_type type) { - GL *gl = sc->gl; + int glsl_version = sc->ra->glsl_version; + int glsl_es = sc->ra->glsl_es ? glsl_version : 0; + + sc->params.type = type; // gl_sc_reset() must be called after ending the previous render process, // and before starting a new one.