1 files changed, 151 insertions, 108 deletions
diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c
index 28d4297f6f..14c9444fd3 100644
--- a/video/out/placebo/ra_pl.c
+++ b/video/out/placebo/ra_pl.c
@@ -5,10 +5,11 @@
 #include "utils.h"
 
 struct ra_pl {
-    const struct pl_gpu *gpu;
+    pl_gpu gpu;
+    struct ra_timer_pl *active_timer;
 };
 
-static inline const struct pl_gpu *get_gpu(const struct ra *ra)
+static inline pl_gpu get_gpu(const struct ra *ra)
 {
     struct ra_pl *p = ra->priv;
     return p->gpu;
@@ -16,12 +17,14 @@ static inline const struct pl_gpu *get_gpu(const struct ra *ra)
 
 static struct ra_fns ra_fns_pl;
 
-const struct pl_gpu *ra_pl_get(const struct ra *ra)
+pl_gpu ra_pl_get(const struct ra *ra)
 {
     return ra->fns == &ra_fns_pl ? get_gpu(ra) : NULL;
 }
 
-struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
+static pl_timer get_active_timer(const struct ra *ra);
+
+struct ra *ra_create_pl(pl_gpu gpu, struct mp_log *log)
 {
     assert(gpu);
 
@@ -38,12 +41,14 @@ struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
 
     ra->caps = RA_CAP_DIRECT_UPLOAD | RA_CAP_NESTED_ARRAY | RA_CAP_FRAGCOORD;
 
-    if (gpu->caps & PL_GPU_CAP_COMPUTE)
+    if (gpu->glsl.compute)
         ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
-    if (gpu->caps & PL_GPU_CAP_PARALLEL_COMPUTE)
+    if (gpu->limits.compute_queues > gpu->limits.fragment_queues)
         ra->caps |= RA_CAP_PARALLEL_COMPUTE;
-    if (gpu->caps & PL_GPU_CAP_INPUT_VARIABLES)
+    if (gpu->limits.max_variable_comps)
         ra->caps |= RA_CAP_GLOBAL_UNIFORM;
+    if (!gpu->limits.host_cached)
+        ra->caps |= RA_CAP_SLOW_DR;
 
     if (gpu->limits.max_tex_1d_dim)
         ra->caps |= RA_CAP_TEX_1D;
@@ -53,21 +58,22 @@ struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
         ra->caps |= RA_CAP_BUF_RO;
     if (gpu->limits.max_ssbo_size)
         ra->caps |= RA_CAP_BUF_RW;
-    if (gpu->limits.min_gather_offset && gpu->limits.max_gather_offset)
+    if (gpu->glsl.min_gather_offset && gpu->glsl.max_gather_offset)
         ra->caps |= RA_CAP_GATHER;
 
     // Semi-hack: assume all textures are blittable if r8 is
-    const struct pl_fmt *r8 = pl_find_named_fmt(gpu, "r8");
+    pl_fmt r8 = pl_find_named_fmt(gpu, "r8");
     if (r8->caps & PL_FMT_CAP_BLITTABLE)
         ra->caps |= RA_CAP_BLIT;
 
     ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
-    ra->max_shmem = gpu->limits.max_shmem_size;
     ra->max_pushc_size = gpu->limits.max_pushc_size;
+    ra->max_compute_group_threads = gpu->glsl.max_group_threads;
+    ra->max_shmem = gpu->glsl.max_shmem_size;
 
     // Set up format wrappers
     for (int i = 0; i < gpu->num_formats; i++) {
-        const struct pl_fmt *plfmt = gpu->formats[i];
+        pl_fmt plfmt = gpu->formats[i];
         static const enum ra_ctype fmt_type_map[PL_FMT_TYPE_COUNT] = {
             [PL_FMT_UNORM]  = RA_CTYPE_UNORM,
             [PL_FMT_UINT]   = RA_CTYPE_UINT,
@@ -108,7 +114,7 @@ static void destroy_ra_pl(struct ra *ra)
     talloc_free(ra);
 }
 
-static struct ra_format *map_fmt(struct ra *ra, const struct pl_fmt *plfmt)
+static struct ra_format *map_fmt(struct ra *ra, pl_fmt plfmt)
 {
     for (int i = 0; i < ra->num_formats; i++) {
         if (ra->formats[i]->priv == plfmt)
@@ -119,8 +125,7 @@ static struct ra_format *map_fmt(struct ra *ra, const struct pl_fmt *plfmt)
     return NULL;
 }
 
-bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
-                   struct ra_tex *out_tex)
+bool mppl_wrap_tex(struct ra *ra, pl_tex pltex, struct ra_tex *out_tex)
 {
     if (!pltex)
         return false;
@@ -139,8 +144,9 @@ bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
             .blit_dst = pltex->params.blit_dst,
             .host_mutable = pltex->params.host_writable,
             .downloadable = pltex->params.host_readable,
-            .src_linear = pltex->params.sample_mode == PL_TEX_SAMPLE_LINEAR,
-            .src_repeat = pltex->params.address_mode == PL_TEX_ADDRESS_REPEAT,
+            // These don't exist upstream, so just pick something reasonable
+            .src_linear = pltex->params.format->caps & PL_FMT_CAP_LINEAR,
+            .src_repeat = false,
         },
         .priv = (void *) pltex,
     };
@@ -151,34 +157,8 @@ bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
 static struct ra_tex *tex_create_pl(struct ra *ra,
                                     const struct ra_tex_params *params)
 {
-    const struct pl_gpu *gpu = get_gpu(ra);
-
-    // Check size limits
-    bool ok = false;
-    switch (params->dimensions) {
-    case 1:
-        ok = params->w <= gpu->limits.max_tex_1d_dim;
-        break;
-
-    case 2:
-        ok = params->w <= gpu->limits.max_tex_2d_dim &&
-             params->h <= gpu->limits.max_tex_2d_dim;
-        break;
-
-    case 3:
-        ok = params->w <= gpu->limits.max_tex_2d_dim &&
-             params->h <= gpu->limits.max_tex_2d_dim &&
-             params->d <= gpu->limits.max_tex_2d_dim;
-        break;
-    };
-
-    if (!ok) {
-        MP_ERR(ra, "Texture size %dx%dx%d exceeds dimension limits!\n",
-               params->w, params->h, params->d);
-        return NULL;
-    }
-
-    const struct pl_tex *pltex = pl_tex_create(gpu, &(struct pl_tex_params) {
+    pl_gpu gpu = get_gpu(ra);
+    pl_tex pltex = pl_tex_create(gpu, &(struct pl_tex_params) {
         .w = params->w,
         .h = params->dimensions >= 2 ? params->h : 0,
         .d = params->dimensions >= 3 ? params->d : 0,
@@ -190,10 +170,6 @@ static struct ra_tex *tex_create_pl(struct ra *ra,
         .blit_dst = params->blit_dst || params->render_dst,
         .host_writable = params->host_mutable,
         .host_readable = params->downloadable,
-        .sample_mode = params->src_linear ? PL_TEX_SAMPLE_LINEAR
-                                          : PL_TEX_SAMPLE_NEAREST,
-        .address_mode = params->src_repeat ? PL_TEX_ADDRESS_REPEAT
-                                           : PL_TEX_ADDRESS_CLAMP,
         .initial_data = params->initial_data,
     });
 
@@ -204,6 +180,10 @@ static struct ra_tex *tex_create_pl(struct ra *ra,
         return NULL;
     }
 
+    // Keep track of these, so we can correctly bind them later
+    ratex->params.src_repeat = params->src_repeat;
+    ratex->params.src_linear = params->src_linear;
+
     return ratex;
 }
 
@@ -212,48 +192,47 @@ static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex)
     if (!tex)
         return;
 
-    pl_tex_destroy(get_gpu(ra), (const struct pl_tex **) &tex->priv);
+    pl_tex_destroy(get_gpu(ra), (pl_tex *) &tex->priv);
     talloc_free(tex);
 }
 
-static int texel_stride_w(size_t stride, const struct pl_tex *tex)
-{
-    size_t texel_size = tex->params.format->texel_size;
-    int texels = stride / texel_size;
-    assert(texels * texel_size == stride);
-    return texels;
-}
-
 static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params)
 {
-    const struct pl_tex *tex = params->tex->priv;
+    pl_gpu gpu = get_gpu(ra);
+    pl_tex tex = params->tex->priv;
     struct pl_tex_transfer_params pl_params = {
         .tex = tex,
         .buf = params->buf ? params->buf->priv : NULL,
         .buf_offset = params->buf_offset,
         .ptr = (void *) params->src,
+        .timer = get_active_timer(ra),
     };
 
+    pl_buf staging = NULL;
     if (params->tex->params.dimensions == 2) {
-        pl_params.stride_w = texel_stride_w(params->stride, tex);
         if (params->rc) {
             pl_params.rc = (struct pl_rect3d) {
                 .x0 = params->rc->x0, .x1 = params->rc->x1,
                 .y0 = params->rc->y0, .y1 = params->rc->y1,
             };
         }
+
+        pl_params.row_pitch = params->stride;
     }
 
-    return pl_tex_upload(get_gpu(ra), &pl_params);
+    bool ok = pl_tex_upload(gpu, &pl_params);
+    pl_buf_destroy(gpu, &staging);
+    return ok;
 }
 
 static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params)
 {
-    const struct pl_tex *tex = params->tex->priv;
+    pl_tex tex = params->tex->priv;
     struct pl_tex_transfer_params pl_params = {
         .tex = tex,
         .ptr = params->dst,
-        .stride_w = texel_stride_w(params->stride, tex),
+        .timer = get_active_timer(ra),
+        .row_pitch = params->stride,
     };
 
     return pl_tex_download(get_gpu(ra), &pl_params);
@@ -262,28 +241,10 @@ static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params
 static struct ra_buf *buf_create_pl(struct ra *ra,
                                     const struct ra_buf_params *params)
 {
-    static const enum pl_buf_type buf_type[] = {
-        [RA_BUF_TYPE_TEX_UPLOAD]     = PL_BUF_TEX_TRANSFER,
-        [RA_BUF_TYPE_SHADER_STORAGE] = PL_BUF_STORAGE,
-        [RA_BUF_TYPE_UNIFORM]        = PL_BUF_UNIFORM,
-        [RA_BUF_TYPE_SHARED_MEMORY]  = 0,
-    };
-
-    const struct pl_gpu *gpu = get_gpu(ra);
-    size_t max_size[] = {
-        [PL_BUF_TEX_TRANSFER] = gpu->limits.max_xfer_size,
-        [PL_BUF_UNIFORM]      = gpu->limits.max_ubo_size,
-        [PL_BUF_STORAGE]      = gpu->limits.max_ssbo_size,
-    };
-
-    if (params->size > max_size[buf_type[params->type]]) {
-        MP_ERR(ra, "Buffer size %zu exceeds size limits!\n", params->size);
-        return NULL;
-    }
-
-    const struct pl_buf *plbuf = pl_buf_create(gpu, &(struct pl_buf_params) {
-        .type = buf_type[params->type],
+    pl_buf plbuf = pl_buf_create(get_gpu(ra), &(struct pl_buf_params) {
         .size = params->size,
+        .uniform = params->type == RA_BUF_TYPE_UNIFORM,
+        .storable = params->type == RA_BUF_TYPE_SHADER_STORAGE,
         .host_mapped = params->host_mapped,
         .host_writable = params->host_mutable,
         .initial_data = params->initial_data,
@@ -308,7 +269,7 @@ static void buf_destroy_pl(struct ra *ra, struct ra_buf *buf)
     if (!buf)
         return;
 
-    pl_buf_destroy(get_gpu(ra), (const struct pl_buf **) &buf->priv);
+    pl_buf_destroy(get_gpu(ra), (pl_buf *) &buf->priv);
     talloc_free(buf);
 }
 
@@ -348,7 +309,14 @@ static void blit_pl(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
         pldst.y1 = MPMIN(MPMAX(dst_rc->y1, 0), dst->params.h);
     }
 
-    pl_tex_blit(get_gpu(ra), dst->priv, src->priv, pldst, plsrc);
+    pl_tex_blit(get_gpu(ra), &(struct pl_tex_blit_params) {
+        .src = src->priv,
+        .dst = dst->priv,
+        .src_rc = plsrc,
+        .dst_rc = pldst,
+        .sample_mode = src->params.src_linear ? PL_TEX_SAMPLE_LINEAR
+                                              : PL_TEX_SAMPLE_NEAREST,
+    });
 }
 
 static const enum pl_var_type var_type[RA_VARTYPE_COUNT] = {
@@ -418,7 +386,7 @@ static int desc_namespace_pl(struct ra *ra, enum ra_vartype type)
 }
 
 struct pass_priv {
-    const struct pl_pass *pl_pass;
+    pl_pass pass;
     uint16_t *inp_index; // index translation map
     // Space to hold the descriptor bindings and variable updates
     struct pl_desc_binding *binds;
@@ -430,7 +398,7 @@ static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
                                     const struct ra_renderpass_params *params)
 {
     void *tmp = talloc_new(NULL);
-    const struct pl_gpu *gpu = get_gpu(ra);
+    pl_gpu gpu = get_gpu(ra);
     struct ra_renderpass *pass = NULL;
 
     static const enum pl_pass_type pass_type[] = {
@@ -482,8 +450,6 @@ static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
         .glsl_shader = params->type == RA_RENDERPASS_TYPE_COMPUTE
                             ? params->compute_shader
                             : params->frag_shader,
-        .cached_program = params->cached_program.start,
-        .cached_program_len = params->cached_program.len,
     };
 
     struct pl_blend_params blend_params;
@@ -492,17 +458,17 @@ static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
         pl_params.vertex_shader = params->vertex_shader;
         pl_params.vertex_type = PL_PRIM_TRIANGLE_LIST;
         pl_params.vertex_stride = params->vertex_stride;
-        pl_params.target_dummy.params.format = params->target_format->priv;
         pl_params.load_target = !params->invalidate_target;
+        pl_params.target_format = params->target_format->priv;
 
         if (params->enable_blend) {
             pl_params.blend_params = &blend_params;
             blend_params = (struct pl_blend_params) {
                 // Same enum order as ra_blend
-                .src_rgb = (enum ra_blend) params->blend_src_rgb,
-                .dst_rgb = (enum ra_blend) params->blend_dst_rgb,
-                .src_alpha = (enum ra_blend) params->blend_src_alpha,
-                .dst_alpha = (enum ra_blend) params->blend_dst_alpha,
+                .src_rgb = (enum pl_blend_mode) params->blend_src_rgb,
+                .dst_rgb = (enum pl_blend_mode) params->blend_dst_rgb,
+                .src_alpha = (enum pl_blend_mode) params->blend_src_alpha,
+                .dst_alpha = (enum pl_blend_mode) params->blend_dst_alpha,
             };
         }
 
@@ -527,8 +493,8 @@ static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
         }
     }
 
-    priv->pl_pass = pl_pass_create(gpu, &pl_params);
-    if (!priv->pl_pass)
+    priv->pass = pl_pass_create(gpu, &pl_params);
+    if (!priv->pass)
         goto error;
 
     pass = talloc_ptrtype(NULL, pass);
@@ -537,11 +503,6 @@ static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
         .priv = talloc_steal(pass, priv),
     };
 
-    pass->params.cached_program = (struct bstr) {
-        .start = (void *) priv->pl_pass->params.cached_program,
-        .len = priv->pl_pass->params.cached_program_len,
-    };
-
     // fall through
 error:
     talloc_free(tmp);
@@ -554,7 +515,7 @@ static void renderpass_destroy_pl(struct ra *ra, struct ra_renderpass *pass)
         return;
 
     struct pass_priv *priv = pass->priv;
-    pl_pass_destroy(get_gpu(ra), (const struct pl_pass **) &priv->pl_pass);
+    pl_pass_destroy(get_gpu(ra), (pl_pass *) &priv->pass);
     talloc_free(pass);
 }
 
@@ -573,17 +534,23 @@ static void renderpass_run_pl(struct ra *ra,
                 .data = val->data,
             });
         } else {
-            struct pl_desc_binding bind;
+            struct pl_desc_binding bind = {0};
             switch (inp->type) {
             case RA_VARTYPE_TEX:
-            case RA_VARTYPE_IMG_W:
-                bind.object = (* (struct ra_tex **) val->data)->priv;
+            case RA_VARTYPE_IMG_W: {
+                struct ra_tex *tex = *((struct ra_tex **) val->data);
+                bind.object = tex->priv;
+                bind.sample_mode = tex->params.src_linear ? PL_TEX_SAMPLE_LINEAR
+                                                          : PL_TEX_SAMPLE_NEAREST;
+                bind.address_mode = tex->params.src_repeat ? PL_TEX_ADDRESS_REPEAT
+                                                           : PL_TEX_ADDRESS_CLAMP;
                 break;
+            }
             case RA_VARTYPE_BUF_RO:
             case RA_VARTYPE_BUF_RW:
                 bind.object = (* (struct ra_buf **) val->data)->priv;
                 break;
-            default: abort();
+            default: MP_ASSERT_UNREACHABLE();
             };
 
             p->binds[p->inp_index[val->index]] = bind;
@@ -591,14 +558,15 @@ static void renderpass_run_pl(struct ra *ra,
     }
 
     struct pl_pass_run_params pl_params = {
-        .pass = p->pl_pass,
+        .pass = p->pass,
         .var_updates = p->varups,
         .num_var_updates = p->num_varups,
         .desc_bindings = p->binds,
         .push_constants = params->push_constants,
+        .timer = get_active_timer(ra),
     };
 
-    if (p->pl_pass->params.type == PL_PASS_RASTER) {
+    if (p->pass->params.type == PL_PASS_RASTER) {
         pl_params.target = params->target->priv;
         pl_params.viewport = mp_rect2d_to_pl(params->viewport);
         pl_params.scissors = mp_rect2d_to_pl(params->scissors);
@@ -612,6 +580,77 @@ static void renderpass_run_pl(struct ra *ra,
     pl_pass_run(get_gpu(ra), &pl_params);
 }
 
+struct ra_timer_pl {
+    // Because libpplacebo only supports one operation per timer, we need
+    // to use multiple pl_timers to sum up multiple passes/transfers
+    pl_timer *timers;
+    int num_timers;
+    int idx_timers;
+};
+
+static ra_timer *timer_create_pl(struct ra *ra)
+{
+    struct ra_timer_pl *t = talloc_zero(ra, struct ra_timer_pl);
+    return t;
+}
+
+static void timer_destroy_pl(struct ra *ra, ra_timer *timer)
+{
+    pl_gpu gpu = get_gpu(ra);
+    struct ra_timer_pl *t = timer;
+
+    for (int i = 0; i < t->num_timers; i++)
+        pl_timer_destroy(gpu, &t->timers[i]);
+
+    talloc_free(t);
+}
+
+static void timer_start_pl(struct ra *ra, ra_timer *timer)
+{
+    struct ra_pl *p = ra->priv;
+    struct ra_timer_pl *t = timer;
+
+    // There's nothing easy we can do in this case, since libplacebo only
+    // supports one timer object per operation; so just ignore "inner" timers
+    // when the user is nesting different timer queries
+    if (p->active_timer)
+        return;
+
+    p->active_timer = t;
+    t->idx_timers = 0;
+}
+
+static uint64_t timer_stop_pl(struct ra *ra, ra_timer *timer)
+{
+    struct ra_pl *p = ra->priv;
+    struct ra_timer_pl *t = timer;
+
+    if (p->active_timer != t)
+        return 0;
+
+    p->active_timer = NULL;
+
+    // Sum up all of the active results
+    uint64_t res = 0;
+    for (int i = 0; i < t->idx_timers; i++)
+        res += pl_timer_query(p->gpu, t->timers[i]);
+
+    return res;
+}
+
+static pl_timer get_active_timer(const struct ra *ra)
+{
+    struct ra_pl *p = ra->priv;
+    if (!p->active_timer)
+        return NULL;
+
+    struct ra_timer_pl *t = p->active_timer;
+    if (t->idx_timers == t->num_timers)
+        MP_TARRAY_APPEND(t, t->timers, t->num_timers, pl_timer_create(p->gpu));
+
+    return t->timers[t->idx_timers++];
+}
+
 static struct ra_fns ra_fns_pl = {
     .destroy                = destroy_ra_pl,
     .tex_create             = tex_create_pl,
@@ -630,5 +669,9 @@ static struct ra_fns ra_fns_pl = {
     .renderpass_create      = renderpass_create_pl,
     .renderpass_destroy     = renderpass_destroy_pl,
     .renderpass_run         = renderpass_run_pl,
+    .timer_create           = timer_create_pl,
+    .timer_destroy          = timer_destroy_pl,
+    .timer_start            = timer_start_pl,
+    .timer_stop             = timer_stop_pl,
 };