summaryrefslogtreecommitdiffstats
path: root/video/out/gpu/video.c
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-09-28 00:07:42 +0200
committerNiklas Haas <git@haasn.xyz>2017-09-28 01:54:38 +0200
commit67fd5882b86bc544e27a9cb0b188d63bdb950623 (patch)
tree7b0cd41803e8432cf2a5124c85cdf96cf73382a9 /video/out/gpu/video.c
parent002a0ce23200c7044ce744555223e4b884350436 (diff)
downloadmpv-67fd5882b86bc544e27a9cb0b188d63bdb950623.tar.bz2
mpv-67fd5882b86bc544e27a9cb0b188d63bdb950623.tar.xz
vo_gpu: make the vertex attribs dynamic
This has several advantages: 1. no more redundant texcoords when we don't need them 2. no more arbitrary limit on how many textures we can bind 3. (that extends to user shaders as well) 4. no more arbitrary limits on tscale radius To realize this, the VAO was moved from a hacky stateful approach (gl_sc_set_vertex_attribs) - which always bothered me since it was required for compute shaders as well even though they ignored it - to be a proper parameter of gl_sc_dispatch_draw, and internally plumbed into gl_sc_generate, which will make a (properly mangled) deep copy into params.vertex_attribs.
Diffstat (limited to 'video/out/gpu/video.c')
-rw-r--r--video/out/gpu/video.c124
1 files changed, 71 insertions, 53 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 9cececf866..fd0ac25bbf 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -60,28 +60,12 @@ static const char *const fixed_tscale_filters[] = {
// must be sorted, and terminated with 0
int filter_sizes[] =
{2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0};
-int tscale_sizes[] = {2, 4, 6, 0}; // limited by TEXUNIT_VIDEO_NUM
+int tscale_sizes[] = {2, 4, 6, 8, 0};
struct vertex_pt {
float x, y;
};
-struct vertex {
- struct vertex_pt position;
- struct vertex_pt texcoord[TEXUNIT_VIDEO_NUM];
-};
-
-static const struct ra_renderpass_input vertex_vao[] = {
- {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)},
- {"texcoord0", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[0])},
- {"texcoord1", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[1])},
- {"texcoord2", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[2])},
- {"texcoord3", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[3])},
- {"texcoord4", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[4])},
- {"texcoord5", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[5])},
- {0}
-};
-
struct texplane {
struct ra_tex *tex;
int w, h;
@@ -213,6 +197,13 @@ struct gl_video {
bool dumb_mode;
bool forced_dumb_mode;
+ // Cached vertex array, to avoid re-allocation per frame. For simplicity,
+ // our vertex format is simply a list of `vertex_pt`s, since this greatly
+ // simplifies offset calculation at the cost of (unneeded) flexibility.
+ struct vertex_pt *tmp_vertex;
+ struct ra_renderpass_input *vao;
+ int vao_len;
+
const struct ra_format *fbo_format;
struct ra_tex *merge_tex[4];
struct ra_tex *scale_tex[4];
@@ -252,8 +243,8 @@ struct gl_video {
// temporary during rendering
struct compute_info pass_compute; // compute shader metadata for this pass
- struct image pass_img[TEXUNIT_VIDEO_NUM]; // bound images for this pass
- int pass_img_num;
+ struct image *pass_imgs; // bound images for this pass
+ int num_pass_imgs;
struct saved_img *saved_imgs; // saved (named) images for this frame
int num_saved_imgs;
@@ -631,13 +622,12 @@ static struct image image_wrap(struct ra_tex *tex, enum plane_type type,
};
}
-// Bind an image to a free texture unit and return its ID. At most
-// TEXUNIT_VIDEO_NUM texture units can be bound at once
+// Bind an image to a free texture unit and return its ID.
static int pass_bind(struct gl_video *p, struct image img)
{
- assert(p->pass_img_num < TEXUNIT_VIDEO_NUM);
- p->pass_img[p->pass_img_num] = img;
- return p->pass_img_num++;
+ int idx = p->num_pass_imgs;
+ MP_TARRAY_APPEND(p, p->pass_imgs, p->num_pass_imgs, img);
+ return idx;
}
// Rotation by 90° and flipping.
@@ -1062,8 +1052,8 @@ static void pass_prepare_src_tex(struct gl_video *p)
{
struct gl_shader_cache *sc = p->sc;
- for (int n = 0; n < p->pass_img_num; n++) {
- struct image *s = &p->pass_img[n];
+ for (int n = 0; n < p->num_pass_imgs; n++) {
+ struct image *s = &p->pass_imgs[n];
if (!s->tex)
continue;
@@ -1087,6 +1077,11 @@ static void pass_prepare_src_tex(struct gl_video *p)
}
}
+static void cleanup_binds(struct gl_video *p)
+{
+ p->num_pass_imgs = 0;
+}
+
// Sets the appropriate compute shader metadata for an implicit compute pass
// bw/bh: block size
static void pass_is_compute(struct gl_video *p, int bw, int bh)
@@ -1098,12 +1093,6 @@ static void pass_is_compute(struct gl_video *p, int bw, int bh)
};
}
-static void cleanup_binds(struct gl_video *p)
-{
- memset(&p->pass_img, 0, sizeof(p->pass_img));
- p->pass_img_num = 0;
-}
-
// w/h: the width/height of the compute shader's operating domain (e.g. the
// target target that needs to be written, or the source texture that needs to
// be reduced)
@@ -1115,7 +1104,6 @@ static void dispatch_compute(struct gl_video *p, int w, int h,
info.threads_h > 0 ? info.threads_h : info.block_h);
pass_prepare_src_tex(p);
- gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
// Since we don't actually have vertices, we pretend for convenience
// reasons that we do and calculate the right texture coordinates based on
@@ -1123,14 +1111,13 @@ static void dispatch_compute(struct gl_video *p, int w, int h,
gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h });
PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n");
- for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
- struct image *s = &p->pass_img[n];
+ for (int n = 0; n < p->num_pass_imgs; n++) {
+ struct image *s = &p->pass_imgs[n];
if (!s->tex)
continue;
// We need to rescale the coordinates to the true texture size
- char tex_scale[32];
- snprintf(tex_scale, sizeof(tex_scale), "tex_scale%d", n);
+ char *tex_scale = mp_tprintf(32, "tex_scale%d", n);
gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){
(float)s->w / s->tex->params.w,
(float)s->h / s->tex->params.h,
@@ -1155,7 +1142,24 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p,
struct ra_fbo fbo,
const struct mp_rect *dst)
{
- struct vertex va[6] = {0};
+ // The first element is reserved for `vec2 position`
+ int num_vertex_attribs = 1 + p->num_pass_imgs;
+ size_t vertex_stride = num_vertex_attribs * sizeof(struct vertex_pt);
+
+ // Expand the VAO if necessary
+ while (p->vao_len < num_vertex_attribs) {
+ MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
+ .name = talloc_asprintf(p, "texcoord%d", p->vao_len - 1),
+ .type = RA_VARTYPE_FLOAT,
+ .dim_v = 2,
+ .dim_m = 1,
+ .offset = p->vao_len * sizeof(struct vertex_pt),
+ });
+ }
+
+ int num_vertices = 6; // quad as triangle list
+ int num_attribs_total = num_vertices * num_vertex_attribs;
+ MP_TARRAY_GROW(p, p->tmp_vertex, num_attribs_total);
struct gl_transform t;
gl_transform_ortho_fbo(&t, fbo);
@@ -1166,11 +1170,12 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p,
gl_transform_vec(t, &x[1], &y[1]);
for (int n = 0; n < 4; n++) {
- struct vertex *v = &va[n];
- v->position.x = x[n / 2];
- v->position.y = y[n % 2];
- for (int i = 0; i < p->pass_img_num; i++) {
- struct image *s = &p->pass_img[i];
+ struct vertex_pt *vs = &p->tmp_vertex[num_vertex_attribs * n];
+ // vec2 position in idx 0
+ vs[0].x = x[n / 2];
+ vs[0].y = y[n % 2];
+ for (int i = 0; i < p->num_pass_imgs; i++) {
+ struct image *s = &p->pass_imgs[i];
if (!s->tex)
continue;
struct gl_transform tr = s->transform;
@@ -1178,22 +1183,28 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p,
float ty = (n % 2) * s->h;
gl_transform_vec(tr, &tx, &ty);
bool rect = s->tex->params.non_normalized;
- v->texcoord[i].x = tx / (rect ? 1 : s->tex->params.w);
- v->texcoord[i].y = ty / (rect ? 1 : s->tex->params.h);
+ // vec2 texcoordN in idx N+1
+ vs[i + 1].x = tx / (rect ? 1 : s->tex->params.w);
+ vs[i + 1].y = ty / (rect ? 1 : s->tex->params.h);
}
}
- va[4] = va[2];
- va[5] = va[1];
+ memmove(&p->tmp_vertex[num_vertex_attribs * 4],
+ &p->tmp_vertex[num_vertex_attribs * 2],
+ vertex_stride);
- return gl_sc_dispatch_draw(p->sc, fbo.tex, va, 6);
+ memmove(&p->tmp_vertex[num_vertex_attribs * 5],
+ &p->tmp_vertex[num_vertex_attribs * 1],
+ vertex_stride);
+
+ return gl_sc_dispatch_draw(p->sc, fbo.tex, p->vao, p->vao_len, vertex_stride,
+ p->tmp_vertex, num_vertices);
}
static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo,
const struct mp_rect *dst)
{
pass_prepare_src_tex(p);
- gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
pass_record(p, render_pass_quad(p, fbo, dst));
debug_check_gl(p, "after rendering");
cleanup_binds(p);
@@ -1340,7 +1351,7 @@ static void saved_img_store(struct gl_video *p, const char *name,
static bool pass_hook_setup_binds(struct gl_video *p, const char *name,
struct image img, struct tex_hook *hook)
{
- for (int t = 0; t < TEXUNIT_VIDEO_NUM; t++) {
+ for (int t = 0; t < SHADER_MAX_BINDS; t++) {
char *bind_name = (char *)hook->bind_tex[t];
if (!bind_name)
@@ -1370,7 +1381,7 @@ static bool pass_hook_setup_binds(struct gl_video *p, const char *name,
// Clean up texture bindings and move on to the next hook
MP_DBG(p, "Skipping hook on %s due to no texture named %s.\n",
name, bind_name);
- p->pass_img_num -= t;
+ p->num_pass_imgs -= t;
return false;
}
@@ -1481,7 +1492,7 @@ static void pass_opt_hook_point(struct gl_video *p, const char *name,
goto found;
}
- for (int b = 0; b < TEXUNIT_VIDEO_NUM; b++) {
+ for (int b = 0; b < SHADER_MAX_BINDS; b++) {
if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0)
goto found;
}
@@ -2855,7 +2866,6 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
} else {
assert(tscale->kernel && !tscale->kernel->polar);
size = ceil(tscale->kernel->size);
- assert(size <= TEXUNIT_VIDEO_NUM);
}
int radius = size/2;
@@ -3580,6 +3590,14 @@ struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log,
p->opts = *opts;
for (int n = 0; n < SCALER_COUNT; n++)
p->scaler[n] = (struct scaler){.index = n};
+ // our VAO always has the vec2 position as the first element
+ MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
+ .name = "position",
+ .type = RA_VARTYPE_FLOAT,
+ .dim_v = 2,
+ .dim_m = 1,
+ .offset = 0,
+ });
init_gl(p);
reinit_from_options(p);
return p;