summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2017-08-05 14:20:14 +0200
committerwm4 <wm4@nowhere>2017-08-05 16:27:09 +0200
commit333cae74ef0fa62e0355e85d21f0f41ced3963e7 (patch)
tree08e390dd9bdf7e997bf91ebef5bd5579336f27eb
parentf72a33d2cb223df921967cfe7ae203f882ba6921 (diff)
downloadmpv-333cae74ef0fa62e0355e85d21f0f41ced3963e7.tar.bz2
mpv-333cae74ef0fa62e0355e85d21f0f41ced3963e7.tar.xz
vo_opengl: move shader handling to ra
Now all GL-specifics of shader compilation are abstracted through ra. Of course we still have everything hardcoded to GLSL - that isn't going to change. Some things will probably change later - in particular, the way we pass uniforms and textures to the shader. Currently, there is a confusing mismatch between "primitive" uniforms like floats, and others like textures. Also, SSBOs are not abstracted yet.
-rw-r--r--video/out/opengl/gl_utils.c26
-rw-r--r--video/out/opengl/gl_utils.h18
-rw-r--r--video/out/opengl/osd.c39
-rw-r--r--video/out/opengl/osd.h3
-rw-r--r--video/out/opengl/ra.c45
-rw-r--r--video/out/opengl/ra.h164
-rw-r--r--video/out/opengl/ra_gl.c382
-rw-r--r--video/out/opengl/ra_gl.h12
-rw-r--r--video/out/opengl/shader_cache.c809
-rw-r--r--video/out/opengl/shader_cache.h52
-rw-r--r--video/out/opengl/video.c83
11 files changed, 1016 insertions, 617 deletions
diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c
index c870756b1e..df6f0543ad 100644
--- a/video/out/opengl/gl_utils.c
+++ b/video/out/opengl/gl_utils.c
@@ -150,17 +150,32 @@ static void gl_vao_enable_attribs(struct gl_vao *vao)
{
GL *gl = vao->gl;
- for (int n = 0; vao->entries[n].name; n++) {
- const struct gl_vao_entry *e = &vao->entries[n];
+ for (int n = 0; n < vao->num_entries; n++) {
+ const struct ra_renderpass_input *e = &vao->entries[n];
+ GLenum type = 0;
+ bool normalized = false;
+ switch (e->type) {
+ case RA_VARTYPE_FLOAT:
+ type = GL_FLOAT;
+ break;
+ case RA_VARTYPE_BYTE_UNORM:
+ type = GL_UNSIGNED_BYTE;
+ normalized = true;
+ break;
+ default:
+ abort();
+ }
+ assert(e->dim_m == 1);
gl->EnableVertexAttribArray(n);
- gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized,
- vao->stride, (void *)(intptr_t)e->offset);
+ gl->VertexAttribPointer(n, e->dim_v, type, normalized,
+ vao->stride, (void *)(intptr_t)e->binding);
}
}
void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
- const struct gl_vao_entry *entries)
+ const struct ra_renderpass_input *entries,
+ int num_entries)
{
assert(!vao->vao);
assert(!vao->buffer);
@@ -169,6 +184,7 @@ void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
.gl = gl,
.stride = stride,
.entries = entries,
+ .num_entries = num_entries,
};
gl->GenBuffers(1, &vao->buffer);
diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h
index 5ae8d1590b..6192a6b312 100644
--- a/video/out/opengl/gl_utils.h
+++ b/video/out/opengl/gl_utils.h
@@ -34,32 +34,22 @@ void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h);
-const char* mp_sampler_type(GLenum texture_target);
-
// print a multi line string with line numbers (e.g. for shader sources)
// log, lev: module and log level, as in mp_msg()
void mp_log_source(struct mp_log *log, int lev, const char *src);
-struct gl_vao_entry {
- // used for shader / glBindAttribLocation
- const char *name;
- // glVertexAttribPointer() arguments
- int num_elems; // size (number of elements)
- GLenum type;
- bool normalized;
- int offset;
-};
-
struct gl_vao {
GL *gl;
GLuint vao; // the VAO object, or 0 if unsupported by driver
GLuint buffer; // GL_ARRAY_BUFFER used for the data
int stride; // size of each element (interleaved elements are assumed)
- const struct gl_vao_entry *entries;
+ const struct ra_renderpass_input *entries;
+ int num_entries;
};
void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
- const struct gl_vao_entry *entries);
+ const struct ra_renderpass_input *entries,
+ int num_entries);
void gl_vao_uninit(struct gl_vao *vao);
void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);
diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c
index 89820693ab..a656451c2e 100644
--- a/video/out/opengl/osd.c
+++ b/video/out/opengl/osd.c
@@ -22,17 +22,16 @@
#include <libavutil/common.h>
#include "formats.h"
-#include "ra_gl.h"
#include "osd.h"
#define GLSL(x) gl_sc_add(sc, #x "\n");
// glBlendFuncSeparate() arguments
static const int blend_factors[SUBBITMAP_COUNT][4] = {
- [SUBBITMAP_LIBASS] = {GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA,
- GL_ONE, GL_ONE_MINUS_SRC_ALPHA},
- [SUBBITMAP_RGBA] = {GL_ONE, GL_ONE_MINUS_SRC_ALPHA,
- GL_ONE, GL_ONE_MINUS_SRC_ALPHA},
+ [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA,
+ RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA},
+ [SUBBITMAP_RGBA] = {RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA,
+ RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA},
};
struct vertex {
@@ -41,10 +40,10 @@ struct vertex {
uint8_t ass_color[4];
};
-static const struct gl_vao_entry vertex_vao[] = {
- {"position", 2, GL_FLOAT, false, offsetof(struct vertex, position)},
- {"texcoord" , 2, GL_FLOAT, false, offsetof(struct vertex, texcoord)},
- {"ass_color", 4, GL_UNSIGNED_BYTE, true, offsetof(struct vertex, ass_color)},
+static const struct ra_renderpass_input vertex_vao[] = {
+ {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)},
+ {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)},
+ {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)},
{0}
};
@@ -53,7 +52,6 @@ struct mpgl_osd_part {
int change_id;
struct ra_tex *texture;
int w, h;
- struct gl_pbo_upload pbo;
int num_subparts;
int prev_num_subparts;
struct sub_bitmap *subparts;
@@ -65,7 +63,6 @@ struct mpgl_osd {
struct mp_log *log;
struct osd_state *osd;
struct ra *ra;
- GL *gl;
struct mpgl_osd_part *parts[MAX_OSD_PARTS];
const struct ra_format *fmt_table[SUBBITMAP_COUNT];
bool formats[SUBBITMAP_COUNT];
@@ -79,14 +76,11 @@ struct mpgl_osd {
struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log,
struct osd_state *osd)
{
- struct ra_gl *ra_gl = ra->priv;
-
struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx);
*ctx = (struct mpgl_osd) {
.log = log,
.osd = osd,
.ra = ra,
- .gl = ra_gl->gl,
.scratch = talloc_zero_size(ctx, 1),
};
@@ -289,9 +283,8 @@ static void get_3d_side_by_side(int stereo_mode, int div[2])
}
void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index,
- struct gl_shader_cache *sc)
+ struct gl_shader_cache *sc, struct ra_tex *target)
{
- GL *gl = ctx->gl;
struct mpgl_osd_part *part = ctx->parts[index];
int div[2];
@@ -313,20 +306,10 @@ void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index,
}
}
- if (!part->num_vertices)
- return;
-
- gl->Enable(GL_BLEND);
-
const int *factors = &blend_factors[part->format][0];
- gl->BlendFuncSeparate(factors[0], factors[1], factors[2], factors[3]);
-
- ctx->gl->Viewport(0, 0, vp_w, abs(vp_h));
-
- gl_sc_draw_data(sc, GL_TRIANGLES, part->vertices, part->num_vertices);
+ gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]);
- gl->BindTexture(GL_TEXTURE_2D, 0);
- gl->Disable(GL_BLEND);
+ gl_sc_dispatch_draw(sc, target, part->vertices, part->num_vertices);
}
static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode)
diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h
index a0aa104a48..08c143ac0e 100644
--- a/video/out/opengl/osd.h
+++ b/video/out/opengl/osd.h
@@ -5,7 +5,6 @@
#include <inttypes.h>
#include "utils.h"
-#include "gl_utils.h"
#include "shader_cache.h"
#include "sub/osd.h"
@@ -19,7 +18,7 @@ void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mod
bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index,
struct gl_shader_cache *sc);
void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index,
- struct gl_shader_cache *sc);
+ struct gl_shader_cache *sc, struct ra_tex *target);
int64_t mpgl_get_change_counter(struct mpgl_osd *ctx);
#endif
diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c
index 9096a50148..e67dd6ebdf 100644
--- a/video/out/opengl/ra.c
+++ b/video/out/opengl/ra.c
@@ -16,6 +16,51 @@ void ra_tex_free(struct ra *ra, struct ra_tex **tex)
*tex = NULL;
}
+static size_t vartype_size(enum ra_vartype type)
+{
+ switch (type) {
+ case RA_VARTYPE_INT: return sizeof(int);
+ case RA_VARTYPE_FLOAT: return sizeof(float);
+ case RA_VARTYPE_BYTE_UNORM: return 1;
+ default: return 0;
+ }
+}
+
+// Return the size of the data ra_renderpass_input_val.data is going to point
+// to. This returns 0 for non-primitive types such as textures.
+size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input)
+{
+ size_t el_size = vartype_size(input->type);
+ return el_size * input->dim_v * input->dim_m;
+}
+
+static struct ra_renderpass_input *dup_inputs(void *ta_parent,
+ const struct ra_renderpass_input *inputs, int num_inputs)
+{
+ struct ra_renderpass_input *res =
+ talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0]));
+ for (int n = 0; n < num_inputs; n++)
+ res[n].name = talloc_strdup(res, res[n].name);
+ return res;
+}
+
+// Return a newly allocated deep-copy of params.
+struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent,
+ const struct ra_renderpass_params *params)
+{
+ struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res);
+ *res = *params;
+ res->inputs = dup_inputs(res, res->inputs, res->num_inputs);
+ res->vertex_attribs =
+ dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs);
+ res->cached_program = bstrdup(res, res->cached_program);
+ res->vertex_shader = talloc_strdup(res, res->vertex_shader);
+ res->frag_shader = talloc_strdup(res, res->frag_shader);
+ res->compute_shader = talloc_strdup(res, res->compute_shader);
+ return res;
+};
+
+
// Return whether this is a tightly packed format with no external padding and
// with the same bit size/depth in all components.
static bool ra_format_is_regular(const struct ra_format *fmt)
diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h
index ab3027b78a..75ba0e5fe7 100644
--- a/video/out/opengl/ra.h
+++ b/video/out/opengl/ra.h
@@ -1,12 +1,16 @@
#pragma once
#include "common/common.h"
+#include "misc/bstr.h"
// Handle for a rendering API backend.
struct ra {
struct ra_fns *fns;
void *priv;
+ int glsl_version; // GLSL version (e.g. 300 => 3.0)
+ bool glsl_es; // use ES dialect
+
struct mp_log *log;
// RA_CAP_* bit field. The RA backend must set supported features at init
@@ -31,6 +35,9 @@ enum {
RA_CAP_TEX_1D = 1 << 0, // supports 1D textures (as shader source textures)
RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader source textures)
RA_CAP_BLIT = 1 << 2, // supports ra_fns.blit
+ RA_CAP_COMPUTE = 1 << 3, // supports compute shaders
+ RA_CAP_PBO = 1 << 4, // supports ra.use_pbo
+ RA_CAP_NESTED_ARRAY = 1 << 5,
};
enum ra_ctype {
@@ -81,6 +88,7 @@ struct ra_tex_params {
// if true, repeat texture coordinates
bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy
// always set to false, except in OSX code
+ bool external_oes; // hack for GL_TEXTURE_EXTERNAL_OES idiocy
// If non-NULL, the texture will be created with these contents, and is
// considered immutable afterwards (no upload, mapping, or rendering to it).
void *initial_data;
@@ -108,6 +116,149 @@ struct ra_mapped_buffer {
size_t size; // total size of the mapping, starting at data
};
+// Type of a shader uniform variable, or a vertex attribute. In all cases,
+// vectors are matrices are done by having more than 1 value.
+enum ra_vartype {
+ RA_VARTYPE_INVALID,
+ RA_VARTYPE_INT, // C: int, GLSL: int, ivec*
+ RA_VARTYPE_FLOAT, // C: float, GLSL: float, vec*, mat*
+ RA_VARTYPE_TEX, // C: ra_tex*, GLSL: various sampler types
+ // ra_tex.params.render_src must be true
+ RA_VARTYPE_IMG_W, // C: ra_tex*, GLSL: various image types
+ // write-only (W) image for compute shaders
+ RA_VARTYPE_BYTE_UNORM, // C: uint8_t, GLSL: int, vec* (vertex data only)
+ RA_VARTYPE_SSBO, // a hack for GL
+};
+
+// Represents a uniform, texture input parameter, and similar things.
+struct ra_renderpass_input {
+ const char *name; // name as used in the shader
+ enum ra_vartype type;
+ // The total number of values is given by dim_v * dim_m.
+ int dim_v; // vector dimension (1 for non-vector and non-matrix)
+ int dim_m; // additional matrix dimension (dim_v x dim_m)
+ // Vertex data: byte offset of the attribute into the vertex struct
+ // RA_VARTYPE_TEX: texture unit
+ // RA_VARTYPE_IMG_W: image unit
+ // RA_VARTYPE_SSBO: whatever?
+ // Other uniforms: unused
+ int binding;
+};
+
+size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input);
+
+enum ra_blend {
+ RA_BLEND_ZERO,
+ RA_BLEND_ONE,
+ RA_BLEND_SRC_ALPHA,
+ RA_BLEND_ONE_MINUS_SRC_ALPHA,
+};
+
+enum ra_renderpass_type {
+ RA_RENDERPASS_TYPE_INVALID,
+ RA_RENDERPASS_TYPE_RASTER, // vertex+fragment shader
+ RA_RENDERPASS_TYPE_COMPUTE, // compute shader
+};
+
+// Static part of a rendering pass. It conflates the following:
+// - compiled shader and its list of uniforms
+// - vertex attributes and its shader mappings
+// - blending parameters
+// (For Vulkan, this would be shader module + pipeline state.)
+// Upon creation, the values of dynamic values such as uniform contents (whose
+// initial values are not provided here) are required to be 0.
+struct ra_renderpass_params {
+ enum ra_renderpass_type type;
+
+ // Uniforms, including texture/sampler inputs.
+ struct ra_renderpass_input *inputs;
+ int num_inputs;
+
+ // Highly implementation-specific byte array storing a compiled version
+ // of the program. Can be used to speed up shader compilation. A backend
+ // xan read this in renderpass_create, or set this on the newly created
+ // ra_renderpass params field.
+ bstr cached_program;
+
+ // --- type==RA_RENDERPASS_TYPE_RASTER only
+
+ // Describes the format of the vertex data.
+ struct ra_renderpass_input *vertex_attribs;
+ int num_vertex_attribs;
+ int vertex_stride;
+
+ // Shader text, in GLSL. (Yes, you need a GLSL compiler.)
+ // These are complete shaders, including prelude and declarations.
+ const char *vertex_shader;
+ const char *frag_shader;
+
+ // Target blending mode. If enable_blend is false, the blend_ fields can
+ // be ignored.
+ bool enable_blend;
+ enum ra_blend blend_src_rgb;
+ enum ra_blend blend_dst_rgb;
+ enum ra_blend blend_src_alpha;
+ enum ra_blend blend_dst_alpha;
+
+ // --- type==RA_RENDERPASS_TYPE_COMPUTE only
+
+ // Shader text, like vertex_shader/frag_shader.
+ const char *compute_shader;
+};
+
+struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent,
+ const struct ra_renderpass_params *params);
+
+// Conflates the following typical GPU API concepts:
+// - various kinds of shaders
+// - rendering pipelines
+// - descriptor sets, uniforms, other bindings
+// - all synchronization necessary
+// - the current values of all uniforms (this one makes it relatively stateful
+// from an API perspective)
+struct ra_renderpass {
+ // All fields are read-only after creation.
+ struct ra_renderpass_params params;
+ void *priv;
+};
+
+// An input value (see ra_renderpass_input).
+struct ra_renderpass_input_val {
+ int index; // index into ra_renderpass_params.inputs[]
+ void *data; // pointer to data according to ra_renderpass_input
+ // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9])
+};
+
+// Parameters for performing a rendering pass (basically the dynamic params).
+// These change potentially every time.
+struct ra_renderpass_run_params {
+ struct ra_renderpass *pass;
+
+ // Generally this lists parameters only which changed since the last
+ // invocation and need to be updated. The ra_renderpass instance is
+ // supposed to keep unchanged values from the previous run.
+ // For non-primitive types like textures, these entries are always added,
+ // even if they do not change.
+ struct ra_renderpass_input_val *values;
+ int num_values;
+
+ // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only
+
+ // target->params.render_dst must be true.
+ struct ra_tex *target;
+ struct mp_rect viewport;
+ struct mp_rect scissors;
+
+ // (The primitive type is always a triangle list.)
+ void *vertex_data;
+ int vertex_count; // number of vertex elements, not bytes
+
+ // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only
+
+ // Number of work groups to be run in X/Y/Z dimensions.
+ int compute_groups[3];
+};
+
enum {
// Flags for the texture_upload flags parameter.
RA_TEX_UPLOAD_DISCARD = 1 << 0, // discard pre-existing data not in the region
@@ -183,6 +334,19 @@ struct ra_fns {
// not be called, even if it's non-NULL).
void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
int dst_x, int dst_y, struct mp_rect *src_rc);
+
+ // Compile a shader and create a pipeline. This is a rare operation.
+ // The params pointer and anything it points to must stay valid until
+ // renderpass_destroy.
+ struct ra_renderpass *(*renderpass_create)(struct ra *ra,
+ const struct ra_renderpass_params *params);
+
+ void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass);
+
+ // Perform a render pass, basically drawing a list of triangles to a FBO.
+ // This is an extremely common operation.
+ void (*renderpass_run)(struct ra *ra,
+ const struct ra_renderpass_run_params *params);
};
struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params);
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index e1ecd337e4..7d52063295 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -1,5 +1,6 @@
-#include "formats.h"
+#include <libavutil/intreadwrite.h>
+#include "formats.h"
#include "ra_gl.h"
static struct ra_fns ra_fns_gl;
@@ -22,6 +23,14 @@ int ra_init_gl(struct ra *ra, GL *gl)
ra->caps |= RA_CAP_TEX_3D;
if (gl->BlitFramebuffer)
ra->caps |= RA_CAP_BLIT;
+ if (gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER)
+ ra->caps |= RA_CAP_COMPUTE;
+ if (gl->MapBufferRange)
+ ra->caps |= RA_CAP_PBO;
+ if (gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY)
+ ra->caps |= RA_CAP_NESTED_ARRAY;
+ ra->glsl_version = gl->glsl_version;
+ ra->glsl_es = gl->es > 0;
int gl_fmt_features = gl_format_feature_flags(gl);
@@ -271,6 +280,7 @@ static struct ra_tex *wrap_tex_fbo(struct ra *ra, GLuint gl_obj, bool is_fbo,
.render_dst = is_fbo,
.render_src = !is_fbo,
.non_normalized = gl_target == GL_TEXTURE_RECTANGLE,
+ .external_oes = gl_target == GL_TEXTURE_EXTERNAL_OES,
},
};
@@ -310,6 +320,12 @@ struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h)
return wrap_tex_fbo(ra, gl_fbo, true, 0, GL_RGBA, 0, 0, w, h);
}
+GL *ra_gl_get(struct ra *ra)
+{
+ struct ra_gl *p = ra->priv;
+ return p->gl;
+}
+
static void gl_tex_upload(struct ra *ra, struct ra_tex *tex,
const void *src, ptrdiff_t stride,
struct mp_rect *rc, uint64_t flags,
@@ -478,6 +494,367 @@ static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
}
+static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+ struct ra_gl *p = ra->priv;
+ struct ra_renderpass_gl *pass_gl = pass->priv;
+ p->gl->DeleteProgram(pass_gl->program);
+ gl_vao_uninit(&pass_gl->vao);
+
+ talloc_free(pass_gl);
+ talloc_free(pass);
+}
+
+static const char *shader_typestr(GLenum type)
+{
+ switch (type) {
+ case GL_VERTEX_SHADER: return "vertex";
+ case GL_FRAGMENT_SHADER: return "fragment";
+ case GL_COMPUTE_SHADER: return "compute";
+ default: abort();
+ }
+}
+
+static void compile_attach_shader(struct ra *ra, GLuint program,
+ GLenum type, const char *source, bool *ok)
+{
+ struct ra_gl *p = ra->priv;
+ GL *gl = p->gl;
+
+ GLuint shader = gl->CreateShader(type);
+ gl->ShaderSource(shader, 1, &source, NULL);
+ gl->CompileShader(shader);
+ GLint status = 0;
+ gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status);
+ GLint log_length = 0;
+ gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+
+ int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
+ const char *typestr = shader_typestr(type);
+ if (mp_msg_test(ra->log, pri)) {
+ MP_MSG(ra, pri, "%s shader source:\n", typestr);
+ mp_log_source(ra->log, pri, source);
+ }
+ if (log_length > 1) {
+ GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
+ gl->GetShaderInfoLog(shader, log_length, NULL, logstr);
+ MP_MSG(ra, pri, "%s shader compile log (status=%d):\n%s\n",
+ typestr, status, logstr);
+ talloc_free(logstr);
+ }
+ if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(ra->log, MSGL_DEBUG)) {
+ GLint len = 0;
+ gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len);
+ if (len > 0) {
+ GLchar *sstr = talloc_zero_size(NULL, len + 1);
+ gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr);
+ MP_DBG(ra, "Translated shader:\n");
+ mp_log_source(ra->log, MSGL_DEBUG, sstr);
+ }
+ }
+
+ gl->AttachShader(program, shader);
+ gl->DeleteShader(shader);
+
+ *ok &= status;
+}
+
+static void link_shader(struct ra *ra, GLuint program, bool *ok)
+{
+ struct ra_gl *p = ra->priv;
+ GL *gl = p->gl;
+
+ gl->LinkProgram(program);
+ GLint status = 0;
+ gl->GetProgramiv(program, GL_LINK_STATUS, &status);
+ GLint log_length = 0;
+ gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
+
+ int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
+ if (mp_msg_test(ra->log, pri)) {
+ GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
+ gl->GetProgramInfoLog(program, log_length, NULL, logstr);
+ MP_MSG(ra, pri, "shader link log (status=%d): %s\n", status, logstr);
+ talloc_free(logstr);
+ }
+
+ *ok &= status;
+}
+
+// either 'compute' or both 'vertex' and 'frag' are needed
+static GLuint compile_program(struct ra *ra, const struct ra_renderpass_params *p)
+{
+ struct ra_gl *priv = ra->priv;
+ GL *gl = priv->gl;
+
+ GLuint prog = gl->CreateProgram();
+ bool ok = true;
+ if (p->type == RA_RENDERPASS_TYPE_COMPUTE)
+ compile_attach_shader(ra, prog, GL_COMPUTE_SHADER, p->compute_shader, &ok);
+ if (p->type == RA_RENDERPASS_TYPE_RASTER) {
+ compile_attach_shader(ra, prog, GL_VERTEX_SHADER, p->vertex_shader, &ok);
+ compile_attach_shader(ra, prog, GL_FRAGMENT_SHADER, p->frag_shader, &ok);
+ for (int n = 0; n < p->num_vertex_attribs; n++)
+ gl->BindAttribLocation(prog, n, p->vertex_attribs[n].name);
+ }
+ link_shader(ra, prog, &ok);
+ if (!ok) {
+ gl->DeleteProgram(prog);
+ prog = 0;
+ }
+ return prog;
+}
+
+static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p,
+ bstr *out_cached_data)
+{
+ struct ra_gl *priv = ra->priv;
+ GL *gl = priv->gl;
+
+ GLuint prog = 0;
+
+ if (gl->ProgramBinary && p->cached_program.len > 4) {
+ GLenum format = AV_RL32(p->cached_program.start);
+ prog = gl->CreateProgram();
+ gl_check_error(gl, ra->log, "before loading program");
+ gl->ProgramBinary(prog, format, p->cached_program.start + 4,
+ p->cached_program.len - 4);
+ gl->GetError(); // discard potential useless error
+ GLint status = 0;
+ gl->GetProgramiv(prog, GL_LINK_STATUS, &status);
+ if (status) {
+ MP_VERBOSE(ra, "Loading binary program succeeded.\n");
+ } else {
+ gl->DeleteProgram(prog);
+ prog = 0;
+ }
+ }
+
+ if (!prog) {
+ prog = compile_program(ra, p);
+
+ if (gl->GetProgramBinary && prog) {
+ GLint size = 0;
+ gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size);
+ uint8_t *buffer = talloc_size(NULL, size + 4);
+ GLsizei actual_size = 0;
+ GLenum binary_format = 0;
+ gl->GetProgramBinary(prog, size, &actual_size, &binary_format,
+ buffer + 4);
+ AV_WL32(buffer, binary_format);
+ if (actual_size)
+ *out_cached_data = (bstr){buffer, actual_size + 4};
+ }
+ }
+
+ return prog;
+}
+
+static struct ra_renderpass *gl_renderpass_create(struct ra *ra,
+ const struct ra_renderpass_params *params)
+{
+ struct ra_gl *p = ra->priv;
+ GL *gl = p->gl;
+
+ struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+ pass->params = *ra_render_pass_params_copy(pass, params);
+ pass->params.cached_program = (bstr){0};
+ struct ra_renderpass_gl *pass_gl = pass->priv =
+ talloc_zero(NULL, struct ra_renderpass_gl);
+
+ bstr cached = {0};
+ pass_gl->program = load_program(ra, params, &cached);
+ if (!pass_gl->program) {
+ gl_renderpass_destroy(ra, pass);
+ return NULL;
+ }
+
+ talloc_steal(pass, cached.start);
+ pass->params.cached_program = cached;
+
+ for (int n = 0; n < params->num_inputs; n++) {
+ GLint loc =
+ gl->GetUniformLocation(pass_gl->program, params->inputs[n].name);
+ MP_TARRAY_APPEND(pass_gl, pass_gl->uniform_loc, pass_gl->num_uniform_loc,
+ loc);
+ }
+
+ gl_vao_init(&pass_gl->vao, gl, params->vertex_stride, params->vertex_attribs,
+ params->num_vertex_attribs);
+
+ pass_gl->first_run = true;
+
+ return pass;
+}
+
+static GLenum map_blend(enum ra_blend blend)
+{
+ switch (blend) {
+ case RA_BLEND_ZERO: return GL_ZERO;
+ case RA_BLEND_ONE: return GL_ONE;
+ case RA_BLEND_SRC_ALPHA: return GL_SRC_ALPHA;
+ case RA_BLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA;
+ default: return 0;
+ }
+}
+
+// Assumes program is current (gl->UseProgram(program)).
+static void update_uniform(struct ra *ra, struct ra_renderpass *pass,
+ struct ra_renderpass_input_val *val)
+{
+ struct ra_gl *p = ra->priv;
+ GL *gl = p->gl;
+ struct ra_renderpass_gl *pass_gl = pass->priv;
+
+ struct ra_renderpass_input *input = &pass->params.inputs[val->index];
+ assert(val->index >= 0 && val->index < pass_gl->num_uniform_loc);
+ GLint loc = pass_gl->uniform_loc[val->index];
+
+ switch (input->type) {
+ case RA_VARTYPE_INT: {
+ assert(input->dim_v * input->dim_m == 1);
+ if (loc < 0)
+ break;
+ gl->Uniform1i(loc, *(int *)val->data);
+ break;
+ }
+ case RA_VARTYPE_FLOAT: {
+ float *f = val->data;
+ if (loc < 0)
+ break;
+ if (input->dim_m == 1) {
+ switch (input->dim_v) {
+ case 1: gl->Uniform1f(loc, f[0]); break;
+ case 2: gl->Uniform2f(loc, f[0], f[1]); break;
+ case 3: gl->Uniform3f(loc, f[0], f[1], f[2]); break;
+ case 4: gl->Uniform4f(loc, f[0], f[1], f[2], f[3]); break;
+ default: abort();
+ }
+ } else if (input->dim_v == 2 && input->dim_m == 2) {
+ gl->UniformMatrix2fv(loc, 1, GL_FALSE, f);
+ } else if (input->dim_v == 3 && input->dim_m == 3) {
+ gl->UniformMatrix3fv(loc, 1, GL_FALSE, f);
+ } else {
+ abort();
+ }
+ break;
+ }
+ case RA_VARTYPE_IMG_W: /* fall through */
+ case RA_VARTYPE_TEX: {
+ struct ra_tex *tex = *(struct ra_tex **)val->data;
+ struct ra_tex_gl *tex_gl = tex->priv;
+ assert(tex->params.render_src);
+ if (pass_gl->first_run)
+ gl->Uniform1i(loc, input->binding);
+ if (input->type == RA_VARTYPE_TEX) {
+ gl->ActiveTexture(GL_TEXTURE0 + input->binding);
+ gl->BindTexture(tex_gl->target, tex_gl->texture);
+ } else {
+ gl->BindImageTexture(input->binding, tex_gl->texture, 0, GL_FALSE, 0,
+ GL_WRITE_ONLY, tex_gl->internal_format);
+ }
+ break;
+ }
+ case RA_VARTYPE_SSBO: {
+ gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding,
+ *(int *)val->data);
+ break;
+ }
+ default:
+ abort();
+ }
+}
+
+static void disable_binding(struct ra *ra, struct ra_renderpass *pass,
+ struct ra_renderpass_input_val *val)
+{
+ struct ra_gl *p = ra->priv;
+ GL *gl = p->gl;
+
+ struct ra_renderpass_input *input = &pass->params.inputs[val->index];
+
+ switch (input->type) {
+ case RA_VARTYPE_IMG_W: /* fall through */
+ case RA_VARTYPE_TEX: {
+ struct ra_tex *tex = *(struct ra_tex **)val->data;
+ struct ra_tex_gl *tex_gl = tex->priv;
+ assert(tex->params.render_src);
+ if (input->type == RA_VARTYPE_TEX) {
+ gl->ActiveTexture(GL_TEXTURE0 + input->binding);
+ gl->BindTexture(tex_gl->target, 0);
+ } else {
+ gl->BindImageTexture(input->binding, 0, 0, GL_FALSE, 0,
+ GL_WRITE_ONLY, tex_gl->internal_format);
+ }
+ break;
+ }
+ case RA_VARTYPE_SSBO: {
+ gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, 0);
+ break;
+ }
+ }
+}
+
+static void gl_renderpass_run(struct ra *ra,
+ const struct ra_renderpass_run_params *params)
+{
+ struct ra_gl *p = ra->priv;
+ GL *gl = p->gl;
+ struct ra_renderpass *pass = params->pass;
+ struct ra_renderpass_gl *pass_gl = pass->priv;
+
+ gl->UseProgram(pass_gl->program);
+
+ for (int n = 0; n < params->num_values; n++)
+ update_uniform(ra, pass, &params->values[n]);
+ gl->ActiveTexture(GL_TEXTURE0);
+
+ switch (pass->params.type) {
+ case RA_RENDERPASS_TYPE_RASTER: {
+ struct ra_tex_gl *target_gl = params->target->priv;
+ assert(params->target->params.render_dst);
+ gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo);
+ gl->Viewport(params->viewport.x0, params->viewport.y0,
+ mp_rect_w(params->viewport),
+ mp_rect_h(params->viewport));
+ gl->Scissor(params->scissors.x0, params->scissors.y0,
+ mp_rect_w(params->scissors),
+ mp_rect_h(params->scissors));
+ gl->Enable(GL_SCISSOR_TEST);
+ if (pass->params.enable_blend) {
+ gl->BlendFuncSeparate(map_blend(pass->params.blend_src_rgb),
+ map_blend(pass->params.blend_dst_rgb),
+ map_blend(pass->params.blend_src_alpha),
+ map_blend(pass->params.blend_dst_alpha));
+ gl->Enable(GL_BLEND);
+ }
+ gl_vao_draw_data(&pass_gl->vao, GL_TRIANGLES, params->vertex_data,
+ params->vertex_count);
+ gl->Disable(GL_SCISSOR_TEST);
+ gl->Disable(GL_BLEND);
+ gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+ break;
+ }
+ case R