diff options
Diffstat (limited to 'video')
-rw-r--r-- | video/out/opengl/gl_utils.c | 26 | ||||
-rw-r--r-- | video/out/opengl/gl_utils.h | 18 | ||||
-rw-r--r-- | video/out/opengl/osd.c | 39 | ||||
-rw-r--r-- | video/out/opengl/osd.h | 3 | ||||
-rw-r--r-- | video/out/opengl/ra.c | 45 | ||||
-rw-r--r-- | video/out/opengl/ra.h | 164 | ||||
-rw-r--r-- | video/out/opengl/ra_gl.c | 382 | ||||
-rw-r--r-- | video/out/opengl/ra_gl.h | 12 | ||||
-rw-r--r-- | video/out/opengl/shader_cache.c | 809 | ||||
-rw-r--r-- | video/out/opengl/shader_cache.h | 52 | ||||
-rw-r--r-- | video/out/opengl/video.c | 83 |
11 files changed, 1016 insertions, 617 deletions
diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c index c870756b1e..df6f0543ad 100644 --- a/video/out/opengl/gl_utils.c +++ b/video/out/opengl/gl_utils.c @@ -150,17 +150,32 @@ static void gl_vao_enable_attribs(struct gl_vao *vao) { GL *gl = vao->gl; - for (int n = 0; vao->entries[n].name; n++) { - const struct gl_vao_entry *e = &vao->entries[n]; + for (int n = 0; n < vao->num_entries; n++) { + const struct ra_renderpass_input *e = &vao->entries[n]; + GLenum type = 0; + bool normalized = false; + switch (e->type) { + case RA_VARTYPE_FLOAT: + type = GL_FLOAT; + break; + case RA_VARTYPE_BYTE_UNORM: + type = GL_UNSIGNED_BYTE; + normalized = true; + break; + default: + abort(); + } + assert(e->dim_m == 1); gl->EnableVertexAttribArray(n); - gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized, - vao->stride, (void *)(intptr_t)e->offset); + gl->VertexAttribPointer(n, e->dim_v, type, normalized, + vao->stride, (void *)(intptr_t)e->binding); } } void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct gl_vao_entry *entries) + const struct ra_renderpass_input *entries, + int num_entries) { assert(!vao->vao); assert(!vao->buffer); @@ -169,6 +184,7 @@ void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, .gl = gl, .stride = stride, .entries = entries, + .num_entries = num_entries, }; gl->GenBuffers(1, &vao->buffer); diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h index 5ae8d1590b..6192a6b312 100644 --- a/video/out/opengl/gl_utils.h +++ b/video/out/opengl/gl_utils.h @@ -34,32 +34,22 @@ void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); -const char* mp_sampler_type(GLenum texture_target); - // print a multi line string with line numbers (e.g. for shader sources) // log, lev: module and log level, as in mp_msg() void mp_log_source(struct mp_log *log, int lev, const char *src); -struct gl_vao_entry { - // used for shader / glBindAttribLocation - const char *name; - // glVertexAttribPointer() arguments - int num_elems; // size (number of elements) - GLenum type; - bool normalized; - int offset; -}; - struct gl_vao { GL *gl; GLuint vao; // the VAO object, or 0 if unsupported by driver GLuint buffer; // GL_ARRAY_BUFFER used for the data int stride; // size of each element (interleaved elements are assumed) - const struct gl_vao_entry *entries; + const struct ra_renderpass_input *entries; + int num_entries; }; void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct gl_vao_entry *entries); + const struct ra_renderpass_input *entries, + int num_entries); void gl_vao_uninit(struct gl_vao *vao); void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c index 89820693ab..a656451c2e 100644 --- a/video/out/opengl/osd.c +++ b/video/out/opengl/osd.c @@ -22,17 +22,16 @@ #include <libavutil/common.h> #include "formats.h" -#include "ra_gl.h" #include "osd.h" #define GLSL(x) gl_sc_add(sc, #x "\n"); // glBlendFuncSeparate() arguments static const int blend_factors[SUBBITMAP_COUNT][4] = { - [SUBBITMAP_LIBASS] = {GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, - GL_ONE, GL_ONE_MINUS_SRC_ALPHA}, - [SUBBITMAP_RGBA] = {GL_ONE, GL_ONE_MINUS_SRC_ALPHA, - GL_ONE, GL_ONE_MINUS_SRC_ALPHA}, + [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, + [SUBBITMAP_RGBA] = {RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, }; struct vertex { @@ -41,10 +40,10 @@ struct vertex { uint8_t ass_color[4]; }; -static const struct gl_vao_entry vertex_vao[] = { - {"position", 2, GL_FLOAT, false, offsetof(struct vertex, position)}, - {"texcoord" , 2, GL_FLOAT, false, offsetof(struct vertex, texcoord)}, - {"ass_color", 4, GL_UNSIGNED_BYTE, true, offsetof(struct vertex, ass_color)}, +static const struct ra_renderpass_input vertex_vao[] = { + {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, + {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)}, + {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)}, {0} }; @@ -53,7 +52,6 @@ struct mpgl_osd_part { int change_id; struct ra_tex *texture; int w, h; - struct gl_pbo_upload pbo; int num_subparts; int prev_num_subparts; struct sub_bitmap *subparts; @@ -65,7 +63,6 @@ struct mpgl_osd { struct mp_log *log; struct osd_state *osd; struct ra *ra; - GL *gl; struct mpgl_osd_part *parts[MAX_OSD_PARTS]; const struct ra_format *fmt_table[SUBBITMAP_COUNT]; bool formats[SUBBITMAP_COUNT]; @@ -79,14 +76,11 @@ struct mpgl_osd { struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, struct osd_state *osd) { - struct ra_gl *ra_gl = ra->priv; - struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); *ctx = (struct mpgl_osd) { .log = log, .osd = osd, .ra = ra, - .gl = ra_gl->gl, .scratch = talloc_zero_size(ctx, 1), }; @@ -289,9 +283,8 @@ static void get_3d_side_by_side(int stereo_mode, int div[2]) } void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index, - struct gl_shader_cache *sc) + struct gl_shader_cache *sc, struct ra_tex *target) { - GL *gl = ctx->gl; struct mpgl_osd_part *part = ctx->parts[index]; int div[2]; @@ -313,20 +306,10 @@ void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index, } } - if (!part->num_vertices) - return; - - gl->Enable(GL_BLEND); - const int *factors = &blend_factors[part->format][0]; - gl->BlendFuncSeparate(factors[0], factors[1], factors[2], factors[3]); - - ctx->gl->Viewport(0, 0, vp_w, abs(vp_h)); - - gl_sc_draw_data(sc, GL_TRIANGLES, part->vertices, part->num_vertices); + gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); - gl->BindTexture(GL_TEXTURE_2D, 0); - gl->Disable(GL_BLEND); + gl_sc_dispatch_draw(sc, target, part->vertices, part->num_vertices); } static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h index a0aa104a48..08c143ac0e 100644 --- a/video/out/opengl/osd.h +++ b/video/out/opengl/osd.h @@ -5,7 +5,6 @@ #include <inttypes.h> #include "utils.h" -#include "gl_utils.h" #include "shader_cache.h" #include "sub/osd.h" @@ -19,7 +18,7 @@ void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mod bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, struct gl_shader_cache *sc); void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index, - struct gl_shader_cache *sc); + struct gl_shader_cache *sc, struct ra_tex *target); int64_t mpgl_get_change_counter(struct mpgl_osd *ctx); #endif diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c index 9096a50148..e67dd6ebdf 100644 --- a/video/out/opengl/ra.c +++ b/video/out/opengl/ra.c @@ -16,6 +16,51 @@ void ra_tex_free(struct ra *ra, struct ra_tex **tex) *tex = NULL; } +static size_t vartype_size(enum ra_vartype type) +{ + switch (type) { + case RA_VARTYPE_INT: return sizeof(int); + case RA_VARTYPE_FLOAT: return sizeof(float); + case RA_VARTYPE_BYTE_UNORM: return 1; + default: return 0; + } +} + +// Return the size of the data ra_renderpass_input_val.data is going to point +// to. This returns 0 for non-primitive types such as textures. +size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input) +{ + size_t el_size = vartype_size(input->type); + return el_size * input->dim_v * input->dim_m; +} + +static struct ra_renderpass_input *dup_inputs(void *ta_parent, + const struct ra_renderpass_input *inputs, int num_inputs) +{ + struct ra_renderpass_input *res = + talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0])); + for (int n = 0; n < num_inputs; n++) + res[n].name = talloc_strdup(res, res[n].name); + return res; +} + +// Return a newly allocated deep-copy of params. +struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); + *res = *params; + res->inputs = dup_inputs(res, res->inputs, res->num_inputs); + res->vertex_attribs = + dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs); + res->cached_program = bstrdup(res, res->cached_program); + res->vertex_shader = talloc_strdup(res, res->vertex_shader); + res->frag_shader = talloc_strdup(res, res->frag_shader); + res->compute_shader = talloc_strdup(res, res->compute_shader); + return res; +}; + + // Return whether this is a tightly packed format with no external padding and // with the same bit size/depth in all components. static bool ra_format_is_regular(const struct ra_format *fmt) diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h index ab3027b78a..75ba0e5fe7 100644 --- a/video/out/opengl/ra.h +++ b/video/out/opengl/ra.h @@ -1,12 +1,16 @@ #pragma once #include "common/common.h" +#include "misc/bstr.h" // Handle for a rendering API backend. struct ra { struct ra_fns *fns; void *priv; + int glsl_version; // GLSL version (e.g. 300 => 3.0) + bool glsl_es; // use ES dialect + struct mp_log *log; // RA_CAP_* bit field. The RA backend must set supported features at init @@ -31,6 +35,9 @@ enum { RA_CAP_TEX_1D = 1 << 0, // supports 1D textures (as shader source textures) RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader source textures) RA_CAP_BLIT = 1 << 2, // supports ra_fns.blit + RA_CAP_COMPUTE = 1 << 3, // supports compute shaders + RA_CAP_PBO = 1 << 4, // supports ra.use_pbo + RA_CAP_NESTED_ARRAY = 1 << 5, }; enum ra_ctype { @@ -81,6 +88,7 @@ struct ra_tex_params { // if true, repeat texture coordinates bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy // always set to false, except in OSX code + bool external_oes; // hack for GL_TEXTURE_EXTERNAL_OES idiocy // If non-NULL, the texture will be created with these contents, and is // considered immutable afterwards (no upload, mapping, or rendering to it). void *initial_data; @@ -108,6 +116,149 @@ struct ra_mapped_buffer { size_t size; // total size of the mapping, starting at data }; +// Type of a shader uniform variable, or a vertex attribute. In all cases, +// vectors are matrices are done by having more than 1 value. +enum ra_vartype { + RA_VARTYPE_INVALID, + RA_VARTYPE_INT, // C: int, GLSL: int, ivec* + RA_VARTYPE_FLOAT, // C: float, GLSL: float, vec*, mat* + RA_VARTYPE_TEX, // C: ra_tex*, GLSL: various sampler types + // ra_tex.params.render_src must be true + RA_VARTYPE_IMG_W, // C: ra_tex*, GLSL: various image types + // write-only (W) image for compute shaders + RA_VARTYPE_BYTE_UNORM, // C: uint8_t, GLSL: int, vec* (vertex data only) + RA_VARTYPE_SSBO, // a hack for GL +}; + +// Represents a uniform, texture input parameter, and similar things. +struct ra_renderpass_input { + const char *name; // name as used in the shader + enum ra_vartype type; + // The total number of values is given by dim_v * dim_m. + int dim_v; // vector dimension (1 for non-vector and non-matrix) + int dim_m; // additional matrix dimension (dim_v x dim_m) + // Vertex data: byte offset of the attribute into the vertex struct + // RA_VARTYPE_TEX: texture unit + // RA_VARTYPE_IMG_W: image unit + // RA_VARTYPE_SSBO: whatever? + // Other uniforms: unused + int binding; +}; + +size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input); + +enum ra_blend { + RA_BLEND_ZERO, + RA_BLEND_ONE, + RA_BLEND_SRC_ALPHA, + RA_BLEND_ONE_MINUS_SRC_ALPHA, +}; + +enum ra_renderpass_type { + RA_RENDERPASS_TYPE_INVALID, + RA_RENDERPASS_TYPE_RASTER, // vertex+fragment shader + RA_RENDERPASS_TYPE_COMPUTE, // compute shader +}; + +// Static part of a rendering pass. It conflates the following: +// - compiled shader and its list of uniforms +// - vertex attributes and its shader mappings +// - blending parameters +// (For Vulkan, this would be shader module + pipeline state.) +// Upon creation, the values of dynamic values such as uniform contents (whose +// initial values are not provided here) are required to be 0. +struct ra_renderpass_params { + enum ra_renderpass_type type; + + // Uniforms, including texture/sampler inputs. + struct ra_renderpass_input *inputs; + int num_inputs; + + // Highly implementation-specific byte array storing a compiled version + // of the program. Can be used to speed up shader compilation. A backend + // xan read this in renderpass_create, or set this on the newly created + // ra_renderpass params field. + bstr cached_program; + + // --- type==RA_RENDERPASS_TYPE_RASTER only + + // Describes the format of the vertex data. + struct ra_renderpass_input *vertex_attribs; + int num_vertex_attribs; + int vertex_stride; + + // Shader text, in GLSL. (Yes, you need a GLSL compiler.) + // These are complete shaders, including prelude and declarations. + const char *vertex_shader; + const char *frag_shader; + + // Target blending mode. If enable_blend is false, the blend_ fields can + // be ignored. + bool enable_blend; + enum ra_blend blend_src_rgb; + enum ra_blend blend_dst_rgb; + enum ra_blend blend_src_alpha; + enum ra_blend blend_dst_alpha; + + // --- type==RA_RENDERPASS_TYPE_COMPUTE only + + // Shader text, like vertex_shader/frag_shader. + const char *compute_shader; +}; + +struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params); + +// Conflates the following typical GPU API concepts: +// - various kinds of shaders +// - rendering pipelines +// - descriptor sets, uniforms, other bindings +// - all synchronization necessary +// - the current values of all uniforms (this one makes it relatively stateful +// from an API perspective) +struct ra_renderpass { + // All fields are read-only after creation. + struct ra_renderpass_params params; + void *priv; +}; + +// An input value (see ra_renderpass_input). +struct ra_renderpass_input_val { + int index; // index into ra_renderpass_params.inputs[] + void *data; // pointer to data according to ra_renderpass_input + // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9]) +}; + +// Parameters for performing a rendering pass (basically the dynamic params). +// These change potentially every time. +struct ra_renderpass_run_params { + struct ra_renderpass *pass; + + // Generally this lists parameters only which changed since the last + // invocation and need to be updated. The ra_renderpass instance is + // supposed to keep unchanged values from the previous run. + // For non-primitive types like textures, these entries are always added, + // even if they do not change. + struct ra_renderpass_input_val *values; + int num_values; + + // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only + + // target->params.render_dst must be true. + struct ra_tex *target; + struct mp_rect viewport; + struct mp_rect scissors; + + // (The primitive type is always a triangle list.) + void *vertex_data; + int vertex_count; // number of vertex elements, not bytes + + // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only + + // Number of work groups to be run in X/Y/Z dimensions. + int compute_groups[3]; +}; + enum { // Flags for the texture_upload flags parameter. RA_TEX_UPLOAD_DISCARD = 1 << 0, // discard pre-existing data not in the region @@ -183,6 +334,19 @@ struct ra_fns { // not be called, even if it's non-NULL). void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, int dst_x, int dst_y, struct mp_rect *src_rc); + + // Compile a shader and create a pipeline. This is a rare operation. + // The params pointer and anything it points to must stay valid until + // renderpass_destroy. + struct ra_renderpass *(*renderpass_create)(struct ra *ra, + const struct ra_renderpass_params *params); + + void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass); + + // Perform a render pass, basically drawing a list of triangles to a FBO. + // This is an extremely common operation. + void (*renderpass_run)(struct ra *ra, + const struct ra_renderpass_run_params *params); }; struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params); diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index e1ecd337e4..7d52063295 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -1,5 +1,6 @@ -#include "formats.h" +#include <libavutil/intreadwrite.h> +#include "formats.h" #include "ra_gl.h" static struct ra_fns ra_fns_gl; @@ -22,6 +23,14 @@ int ra_init_gl(struct ra *ra, GL *gl) ra->caps |= RA_CAP_TEX_3D; if (gl->BlitFramebuffer) ra->caps |= RA_CAP_BLIT; + if (gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER) + ra->caps |= RA_CAP_COMPUTE; + if (gl->MapBufferRange) + ra->caps |= RA_CAP_PBO; + if (gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY) + ra->caps |= RA_CAP_NESTED_ARRAY; + ra->glsl_version = gl->glsl_version; + ra->glsl_es = gl->es > 0; int gl_fmt_features = gl_format_feature_flags(gl); @@ -271,6 +280,7 @@ static struct ra_tex *wrap_tex_fbo(struct ra *ra, GLuint gl_obj, bool is_fbo, .render_dst = is_fbo, .render_src = !is_fbo, .non_normalized = gl_target == GL_TEXTURE_RECTANGLE, + .external_oes = gl_target == GL_TEXTURE_EXTERNAL_OES, }, }; @@ -310,6 +320,12 @@ struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h) return wrap_tex_fbo(ra, gl_fbo, true, 0, GL_RGBA, 0, 0, w, h); } +GL *ra_gl_get(struct ra *ra) +{ + struct ra_gl *p = ra->priv; + return p->gl; +} + static void gl_tex_upload(struct ra *ra, struct ra_tex *tex, const void *src, ptrdiff_t stride, struct mp_rect *rc, uint64_t flags, @@ -478,6 +494,367 @@ static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); } +static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) +{ + struct ra_gl *p = ra->priv; + struct ra_renderpass_gl *pass_gl = pass->priv; + p->gl->DeleteProgram(pass_gl->program); + gl_vao_uninit(&pass_gl->vao); + + talloc_free(pass_gl); + talloc_free(pass); +} + +static const char *shader_typestr(GLenum type) +{ + switch (type) { + case GL_VERTEX_SHADER: return "vertex"; + case GL_FRAGMENT_SHADER: return "fragment"; + case GL_COMPUTE_SHADER: return "compute"; + default: abort(); + } +} + +static void compile_attach_shader(struct ra *ra, GLuint program, + GLenum type, const char *source, bool *ok) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + GLuint shader = gl->CreateShader(type); + gl->ShaderSource(shader, 1, &source, NULL); + gl->CompileShader(shader); + GLint status = 0; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); + GLint log_length = 0; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + const char *typestr = shader_typestr(type); + if (mp_msg_test(ra->log, pri)) { + MP_MSG(ra, pri, "%s shader source:\n", typestr); + mp_log_source(ra->log, pri, source); + } + if (log_length > 1) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetShaderInfoLog(shader, log_length, NULL, logstr); + MP_MSG(ra, pri, "%s shader compile log (status=%d):\n%s\n", + typestr, status, logstr); + talloc_free(logstr); + } + if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(ra->log, MSGL_DEBUG)) { + GLint len = 0; + gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len); + if (len > 0) { + GLchar *sstr = talloc_zero_size(NULL, len + 1); + gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr); + MP_DBG(ra, "Translated shader:\n"); + mp_log_source(ra->log, MSGL_DEBUG, sstr); + } + } + + gl->AttachShader(program, shader); + gl->DeleteShader(shader); + + *ok &= status; +} + +static void link_shader(struct ra *ra, GLuint program, bool *ok) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + gl->LinkProgram(program); + GLint status = 0; + gl->GetProgramiv(program, GL_LINK_STATUS, &status); + GLint log_length = 0; + gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + if (mp_msg_test(ra->log, pri)) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetProgramInfoLog(program, log_length, NULL, logstr); + MP_MSG(ra, pri, "shader link log (status=%d): %s\n", status, logstr); + talloc_free(logstr); + } + + *ok &= status; +} + +// either 'compute' or both 'vertex' and 'frag' are needed +static GLuint compile_program(struct ra *ra, const struct ra_renderpass_params *p) +{ + struct ra_gl *priv = ra->priv; + GL *gl = priv->gl; + + GLuint prog = gl->CreateProgram(); + bool ok = true; + if (p->type == RA_RENDERPASS_TYPE_COMPUTE) + compile_attach_shader(ra, prog, GL_COMPUTE_SHADER, p->compute_shader, &ok); + if (p->type == RA_RENDERPASS_TYPE_RASTER) { + compile_attach_shader(ra, prog, GL_VERTEX_SHADER, p->vertex_shader, &ok); + compile_attach_shader(ra, prog, GL_FRAGMENT_SHADER, p->frag_shader, &ok); + for (int n = 0; n < p->num_vertex_attribs; n++) + gl->BindAttribLocation(prog, n, p->vertex_attribs[n].name); + } + link_shader(ra, prog, &ok); + if (!ok) { + gl->DeleteProgram(prog); + prog = 0; + } + return prog; +} + +static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p, + bstr *out_cached_data) +{ + struct ra_gl *priv = ra->priv; + GL *gl = priv->gl; + + GLuint prog = 0; + + if (gl->ProgramBinary && p->cached_program.len > 4) { + GLenum format = AV_RL32(p->cached_program.start); + prog = gl->CreateProgram(); + gl_check_error(gl, ra->log, "before loading program"); + gl->ProgramBinary(prog, format, p->cached_program.start + 4, + p->cached_program.len - 4); + gl->GetError(); // discard potential useless error + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + if (status) { + MP_VERBOSE(ra, "Loading binary program succeeded.\n"); + } else { + gl->DeleteProgram(prog); + prog = 0; + } + } + + if (!prog) { + prog = compile_program(ra, p); + + if (gl->GetProgramBinary && prog) { + GLint size = 0; + gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size); + uint8_t *buffer = talloc_size(NULL, size + 4); + GLsizei actual_size = 0; + GLenum binary_format = 0; + gl->GetProgramBinary(prog, size, &actual_size, &binary_format, + buffer + 4); + AV_WL32(buffer, binary_format); + if (actual_size) + *out_cached_data = (bstr){buffer, actual_size + 4}; + } + } + + return prog; +} + +static struct ra_renderpass *gl_renderpass_create(struct ra *ra, + const struct ra_renderpass_params *params) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); + pass->params = *ra_render_pass_params_copy(pass, params); + pass->params.cached_program = (bstr){0}; + struct ra_renderpass_gl *pass_gl = pass->priv = + talloc_zero(NULL, struct ra_renderpass_gl); + + bstr cached = {0}; + pass_gl->program = load_program(ra, params, &cached); + if (!pass_gl->program) { + gl_renderpass_destroy(ra, pass); + return NULL; + } + + talloc_steal(pass, cached.start); + pass->params.cached_program = cached; + + for (int n = 0; n < params->num_inputs; n++) { + GLint loc = + gl->GetUniformLocation(pass_gl->program, params->inputs[n].name); + MP_TARRAY_APPEND(pass_gl, pass_gl->uniform_loc, pass_gl->num_uniform_loc, + loc); + } + + gl_vao_init(&pass_gl->vao, gl, params->vertex_stride, params->vertex_attribs, + params->num_vertex_attribs); + + pass_gl->first_run = true; + + return pass; +} + +static GLenum map_blend(enum ra_blend blend) +{ + switch (blend) { + case RA_BLEND_ZERO: return GL_ZERO; + case RA_BLEND_ONE: return GL_ONE; + case RA_BLEND_SRC_ALPHA: return GL_SRC_ALPHA; + case RA_BLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA; + default: return 0; + } +} + +// Assumes program is current (gl->UseProgram(program)). +static void update_uniform(struct ra *ra, struct ra_renderpass *pass, + struct ra_renderpass_input_val *val) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct ra_renderpass_gl *pass_gl = pass->priv; + + struct ra_renderpass_input *input = &pass->params.inputs[val->index]; + assert(val->index >= 0 && val->index < pass_gl->num_uniform_loc); + GLint loc = pass_gl->uniform_loc[val->index]; + + switch (input->type) { + case RA_VARTYPE_INT: { + assert(input->dim_v * input->dim_m == 1); + if (loc < 0) + break; + gl->Uniform1i(loc, *(int *)val->data); + break; + } + case RA_VARTYPE_FLOAT: { + float *f = val->data; + if (loc < 0) + break; + if (input->dim_m == 1) { + switch (input->dim_v) { + case 1: gl->Uniform1f(loc, f[0]); break; + case 2: gl->Uniform2f(loc, f[0], f[1]); break; + case 3: gl->Uniform3f(loc, f[0], f[1], f[2]); break; + case 4: gl->Uniform4f(loc, f[0], f[1], f[2], f[3]); break; + default: abort(); + } + } else if (input->dim_v == 2 && input->dim_m == 2) { + gl->UniformMatrix2fv(loc, 1, GL_FALSE, f); + } else if (input->dim_v == 3 && input->dim_m == 3) { + gl->UniformMatrix3fv(loc, 1, GL_FALSE, f); + } else { + abort(); + } + break; + } + case RA_VARTYPE_IMG_W: /* fall through */ + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct ra_tex_gl *tex_gl = tex->priv; + assert(tex->params.render_src); + if (pass_gl->first_run) + gl->Uniform1i(loc, input->binding); + if (input->type == RA_VARTYPE_TEX) { + gl->ActiveTexture(GL_TEXTURE0 + input->binding); + gl->BindTexture(tex_gl->target, tex_gl->texture); + } else { + gl->BindImageTexture(input->binding, tex_gl->texture, 0, GL_FALSE, 0, + GL_WRITE_ONLY, tex_gl->internal_format); + } + break; + } + case RA_VARTYPE_SSBO: { + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, + *(int *)val->data); + break; + } + default: + abort(); + } +} + +static void disable_binding(struct ra *ra, struct ra_renderpass *pass, + struct ra_renderpass_input_val *val) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + struct ra_renderpass_input *input = &pass->params.inputs[val->index]; + + switch (input->type) { + case RA_VARTYPE_IMG_W: /* fall through */ + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct ra_tex_gl *tex_gl = tex->priv; + assert(tex->params.render_src); + if (input->type == RA_VARTYPE_TEX) { + gl->ActiveTexture(GL_TEXTURE0 + input->binding); + gl->BindTexture(tex_gl->target, 0); + } else { + gl->BindImageTexture(input->binding, 0, 0, GL_FALSE, 0, + GL_WRITE_ONLY, tex_gl->internal_format); + } + break; + } + case RA_VARTYPE_SSBO: { + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, 0); + break; + } + } +} + +static void gl_renderpass_run(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct ra_renderpass *pass = params->pass; + struct ra_renderpass_gl *pass_gl = pass->priv; + + gl->UseProgram(pass_gl->program); + + for (int n = 0; n < params->num_values; n++) + update_uniform(ra, pass, ¶ms->values[n]); + gl->ActiveTexture(GL_TEXTURE0); + + switch (pass->params.type) { + case RA_RENDERPASS_TYPE_RASTER: { + struct ra_tex_gl *target_gl = params->target->priv; + assert(params->target->params.render_dst); + gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo); + gl->Viewport(params->viewport.x0, params->viewport.y0, + mp_rect_w(params->viewport), + mp_rect_h(params->viewport)); + gl->Scissor(params->scissors.x0, params->scissors.y0, + mp_rect_w(params->scissors), + mp_rect_h(params->scissors)); + gl->Enable(GL_SCISSOR_TEST); + if (pass->params.enable_blend) { + gl->BlendFuncSeparate(map_blend(pass->params.blend_src_rgb), + map_blend(pass->params.blend_dst_rgb), + map_blend(pass->params.blend_src_alpha), + map_blend(pass->params.blend_dst_alpha)); + gl->Enable(GL_BLEND); + } + gl_vao_draw_data(&pass_gl->vao, GL_TRIANGLES, params->vertex_data, + params->vertex_count); + gl->Disable(GL_SCISSOR_TEST); + gl->Disable(GL_BLEND); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + break; + } + case RA_RENDERPASS_TYPE_COMPUTE: { + gl->DispatchCompute(params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + + gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + break; + } + default: abort(); + } + + for (int n = 0; n < params->num_values; n++) + disable_binding(ra, pass, ¶ms->values[n]); + gl->ActiveTexture(GL_TEXTURE0); + + gl->UseProgram(0); + + pass_gl->first_run = false; +} + static struct ra_fns ra_fns_gl = { .destro |