summaryrefslogtreecommitdiffstats
path: root/video/out
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-09-08 06:13:55 +0200
committerNiklas Haas <git@haasn.xyz>2017-09-26 17:25:35 +0200
commitca85a153b4a201c7f6d600f861639ef68c1edfa3 (patch)
tree328594163e35181d10396e651c78f393cff38530 /video/out
parented345ffc2f3373743d74a5e0a1dc73c012389273 (diff)
downloadmpv-ca85a153b4a201c7f6d600f861639ef68c1edfa3.tar.bz2
mpv-ca85a153b4a201c7f6d600f861639ef68c1edfa3.tar.xz
vo_gpu: vulkan: add support for push constants
Can in theory avoid updating the uniform buffer every frame
Diffstat (limited to 'video/out')
-rw-r--r--video/out/gpu/ra.h9
-rw-r--r--video/out/gpu/shader_cache.c113
-rw-r--r--video/out/vulkan/ra_vk.c15
3 files changed, 112 insertions, 25 deletions
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index 7a2fa0e11c..15fa782bdd 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -26,6 +26,9 @@ struct ra {
// time.
size_t max_shmem;
+ // Maximum push constant size. Set by the RA backend at init time.
+ size_t max_pushc_size;
+
// Set of supported texture formats. Must be added by RA backend at init time.
// If there are equivalent formats with different caveats, the preferred
// formats should have a lower index. (E.g. GLES3 should put rg8 before la.)
@@ -245,6 +248,7 @@ struct ra_renderpass_params {
// Uniforms, including texture/sampler inputs.
struct ra_renderpass_input *inputs;
int num_inputs;
+ size_t push_constants_size; // must be <= ra.max_pushc_size and a multiple of 4
// Highly implementation-specific byte array storing a compiled version
// of the program. Can be used to speed up shader compilation. A backend
@@ -317,6 +321,7 @@ struct ra_renderpass_run_params {
// even if they do not change.
struct ra_renderpass_input_val *values;
int num_values;
+ void *push_constants; // must be set if params.push_constants_size > 0
// --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only
@@ -387,6 +392,10 @@ struct ra_fns {
// but must be implemented if RA_CAP_BUF_RO is supported.
struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp);
+ // Returns the layout requirements of a push constant element. Optional,
+ // but must be implemented if ra.max_pushc_size > 0.
+ struct ra_layout (*push_constant_layout)(struct ra_renderpass_input *inp);
+
// Clear the dst with the given color (rgba) and within the given scissor.
// dst must have dst->params.render_dst==true. Content outside of the
// scissor is preserved.
diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c
index 28490fda2f..ff386546d3 100644
--- a/video/out/gpu/shader_cache.c
+++ b/video/out/gpu/shader_cache.c
@@ -29,6 +29,7 @@ union uniform_val {
enum sc_uniform_type {
SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM)
SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO)
+ SC_UNIFORM_TYPE_PUSHC = 2, // push constant (ra.max_pushc_size)
};
struct sc_uniform {
@@ -37,7 +38,7 @@ struct sc_uniform {
const char *glsl_type;
union uniform_val v;
char *buffer_format;
- // for SC_UNIFORM_TYPE_UBO:
+ // for SC_UNIFORM_TYPE_UBO/PUSHC:
struct ra_layout layout;
size_t offset; // byte offset within the buffer
};
@@ -56,6 +57,7 @@ struct sc_entry {
struct timer_pool *timer;
struct ra_buf *ubo;
int ubo_index; // for ra_renderpass_input_val.index
+ void *pushc;
};
struct gl_shader_cache {
@@ -87,6 +89,7 @@ struct gl_shader_cache {
int ubo_binding;
size_t ubo_size;
+ size_t pushc_size;
struct ra_renderpass_input_val *values;
int num_values;
@@ -129,6 +132,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
sc->num_uniforms = 0;
sc->ubo_binding = 0;
sc->ubo_size = 0;
+ sc->pushc_size = 0;
for (int i = 0; i < RA_VARTYPE_COUNT; i++)
sc->next_binding[i] = 0;
sc->current_shader = NULL;
@@ -255,25 +259,45 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
}
}
-// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input
-// is already set. Also updates sc_uniform->type.
-static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u)
-{
- if (!(sc->ra->caps & RA_CAP_BUF_RO))
- return;
+// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
+// and glsl_type/buffer_format are already set.
+static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
+{
+ // Try not using push constants for "large" values like matrices, since
+ // this is likely to both exceed the VGPR budget as well as the pushc size
+ // budget
+ bool try_pushc = u->input.dim_m == 1;
+
+ // Attempt using push constants first
+ if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
+ struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input);
+ size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align);
+ // Push constants have limited size, so make sure we don't exceed this
+ size_t new_size = offset + layout.size;
+ if (new_size <= sc->ra->max_pushc_size) {
+ u->type = SC_UNIFORM_TYPE_PUSHC;
+ u->layout = layout;
+ u->offset = offset;
+ sc->pushc_size = new_size;
+ return;
+ }
+ }
- // Using UBOs with explicit layout(offset) like we do requires GLSL version
- // 440 or higher. In theory the UBO code can also use older versions, but
- // just try and avoid potential headaches. This also ensures they're only
- // used on drivers that are probably modern enough to actually support them
- // correctly.
- if (sc->ra->glsl_version < 440)
+ // Attempt using uniform buffer next. The GLSL version 440 check is due
+ // to explicit offsets on UBO entries. In theory we could leave away
+ // the offsets and support UBOs for older GL as well, but this is a nice
+ // safety net for driver bugs (and also rules out potentially buggy drivers)
+ if (sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
+ u->type = SC_UNIFORM_TYPE_UBO;
+ u->layout = sc->ra->fns->uniform_layout(&u->input);
+ u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
+ sc->ubo_size = u->offset + u->layout.size;
return;
+ }
- u->type = SC_UNIFORM_TYPE_UBO;
- u->layout = sc->ra->fns->uniform_layout(&u->input);
- u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
- sc->ubo_size = u->offset + u->layout.size;
+ // If all else fails, use global uniforms
+ assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
+ u->type = SC_UNIFORM_TYPE_GLOBAL;
}
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
@@ -334,7 +358,7 @@ void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_FLOAT;
u->glsl_type = "float";
- update_ubo_params(sc, u);
+ update_uniform_params(sc, u);
u->v.f[0] = f;
}
@@ -343,7 +367,7 @@ void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_INT;
u->glsl_type = "int";
- update_ubo_params(sc, u);
+ update_uniform_params(sc, u);
u->v.i[0] = i;
}
@@ -353,7 +377,7 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 2;
u->glsl_type = "vec2";
- update_ubo_params(sc, u);
+ update_uniform_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
}
@@ -364,7 +388,7 @@ void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 3;
u->glsl_type = "vec3";
- update_ubo_params(sc, u);
+ update_uniform_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
u->v.f[2] = f[2];
@@ -383,7 +407,7 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 2;
u->input.dim_m = 2;
u->glsl_type = "mat2";
- update_ubo_params(sc, u);
+ update_uniform_params(sc, u);
for (int n = 0; n < 4; n++)
u->v.f[n] = v[n];
if (transpose)
@@ -405,7 +429,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 3;
u->input.dim_m = 3;
u->glsl_type = "mat3";
- update_ubo_params(sc, u);
+ update_uniform_params(sc, u);
for (int n = 0; n < 9; n++)
u->v.f[n] = v[n];
if (transpose)
@@ -465,6 +489,20 @@ static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
}
}
+static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u)
+{
+ uintptr_t src = (uintptr_t) &u->v;
+ uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset;
+ struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
+ struct ra_layout dst_layout = u->layout;
+
+ for (int i = 0; i < u->input.dim_m; i++) {
+ memcpy((void *)dst, (void *)src, src_layout.stride);
+ src += src_layout.stride;
+ dst += dst_layout.stride;
+ }
+}
+
static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
struct sc_uniform *u, int n)
{
@@ -489,6 +527,10 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
assert(e->ubo);
update_ubo(sc->ra, e->ubo, u);
break;
+ case SC_UNIFORM_TYPE_PUSHC:
+ assert(e->pushc);
+ update_pushc(sc->ra, e->pushc, u);
+ break;
default: abort();
}
}
@@ -571,6 +613,11 @@ static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
}
+ if (sc->pushc_size) {
+ params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4);
+ entry->pushc = talloc_zero_size(entry, params.push_constants_size);
+ }
+
if (sc->ubo_size) {
struct ra_buf_params ubo_params = {
.type = RA_BUF_TYPE_UNIFORM,
@@ -623,8 +670,22 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
struct sc_uniform *u = &sc->uniforms[n];
if (u->type != SC_UNIFORM_TYPE_UBO)
continue;
- ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset,
- u->glsl_type, u->input.name);
+ ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
+ u->input.name);
+ }
+ ADD(dst, "};\n");
+ }
+
+ // Ditto for push constants
+ if (sc->pushc_size > 0) {
+ ADD(dst, "layout(push_constant) uniform PushC {\n");
+ for (int n = 0; n < sc->num_uniforms; n++) {
+ struct sc_uniform *u = &sc->uniforms[n];
+ if (u->type != SC_UNIFORM_TYPE_PUSHC)
+ continue;
+ // push constants don't support explicit offsets
+ ADD(dst, "/*offset=%zu*/ %s %s;\n", u->offset, u->glsl_type,
+ u->input.name);
}
ADD(dst, "};\n");
}
@@ -911,6 +972,7 @@ struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
.pass = sc->current_shader->pass,
.values = sc->values,
.num_values = sc->num_values,
+ .push_constants = sc->current_shader->pushc,
.target = target,
.vertex_data = ptr,
.vertex_count = num,
@@ -942,6 +1004,7 @@ struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
.pass = sc->current_shader->pass,
.values = sc->values,
.num_values = sc->num_values,
+ .push_constants = sc->current_shader->pushc,
.compute_groups = {w, h, d},
};
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
index 897b2e1ff1..76e242601c 100644
--- a/video/out/vulkan/ra_vk.c
+++ b/video/out/vulkan/ra_vk.c
@@ -191,6 +191,7 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
ra->glsl_version = vk->spirv->glsl_version;
ra->glsl_vulkan = true;
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
+ ra->max_pushc_size = vk->limits.maxPushConstantsSize;
if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
ra->caps |= RA_CAP_COMPUTE;
@@ -1079,6 +1080,12 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &pass_vk->dsLayout,
+ .pushConstantRangeCount = params->push_constants_size ? 1 : 0,
+ .pPushConstantRanges = &(VkPushConstantRange){
+ .stageFlags = stageFlags[params->type],
+ .offset = 0,
+ .size = params->push_constants_size,
+ },
};
VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
@@ -1416,6 +1423,13 @@ static void vk_renderpass_run(struct ra *ra,
vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
+ if (pass->params.push_constants_size) {
+ vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout,
+ stageFlags[pass->params.type], 0,
+ pass->params.push_constants_size,
+ params->push_constants);
+ }
+
switch (pass->params.type) {
case RA_RENDERPASS_TYPE_COMPUTE:
vkCmdDispatch(cmd->buf, params->compute_groups[0],
@@ -1664,6 +1678,7 @@ static struct ra_fns ra_fns_vk = {
.clear = vk_clear,
.blit = vk_blit,
.uniform_layout = std140_layout,
+ .push_constant_layout = std430_layout,
.renderpass_create = vk_renderpass_create,
.renderpass_destroy = vk_renderpass_destroy_lazy,
.renderpass_run = vk_renderpass_run,