From b16d8865b70b4459f87c519159b5e400a9ae7e00 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Tue, 16 Jun 2020 02:19:13 +0200 Subject: vo_gpu: placebo: add fallback code for stride mismatch For cases in which the requirements of the GPU API prevent directly uploading a texture with a given stride, we need to fix the stride manually in host memory. This incurs an extra memcpy, but there's not much we can do about it. (Even in `ra_gl` land, the driver will just hide this memcpy from the user) Note: This code could be done better. It could only copy as many texels as needed, and it could pick a stride that's a multiple of `gpu->limits.align_tex_xfer_stride` for better performance. Patches welcome (tm) Fixes #7759 --- video/out/placebo/ra_pl.c | 64 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c index 18b7467304..f8df590511 100644 --- a/video/out/placebo/ra_pl.c +++ b/video/out/placebo/ra_pl.c @@ -221,16 +221,9 @@ static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex) talloc_free(tex); } -static int texel_stride_w(size_t stride, const struct pl_tex *tex) -{ - size_t texel_size = tex->params.format->texel_size; - int texels = stride / texel_size; - assert(texels * texel_size == stride); - return texels; -} - static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params) { + const struct pl_gpu *gpu = get_gpu(ra); const struct pl_tex *tex = params->tex->priv; struct pl_tex_transfer_params pl_params = { .tex = tex, @@ -242,32 +235,79 @@ static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *para #endif }; + const struct pl_buf *staging = NULL; + if (params->tex->params.dimensions == 2) { - pl_params.stride_w = texel_stride_w(params->stride, tex); + size_t texel_size = tex->params.format->texel_size; + pl_params.stride_w = params->stride / texel_size; + size_t stride = pl_params.stride_w * texel_size; + int lines = tex->params.h; if (params->rc) { pl_params.rc = (struct pl_rect3d) { .x0 = params->rc->x0, .x1 = params->rc->x1, .y0 = params->rc->y0, .y1 = params->rc->y1, }; + lines = pl_rect_h(pl_params.rc); + } + + if (stride != params->stride) { + // Fall back to uploading via a staging buffer prepared in CPU + staging = pl_buf_create(gpu, &(struct pl_buf_params) { + .type = PL_BUF_TEX_TRANSFER, + .size = lines * stride, + .memory_type = PL_BUF_MEM_HOST, + .host_mapped = true, + }); + if (!staging) + return false; + + const uint8_t *src = params->buf ? params->buf->data : params->src; + assert(src); + for (int y = 0; y < lines; y++) + memcpy(staging->data + y * stride, src + y * params->stride, stride); + + pl_params.ptr = NULL; + pl_params.buf = staging; + pl_params.buf_offset = 0; } } - return pl_tex_upload(get_gpu(ra), &pl_params); + bool ok = pl_tex_upload(gpu, &pl_params); + pl_buf_destroy(gpu, &staging); + return ok; } static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params) { const struct pl_tex *tex = params->tex->priv; + size_t texel_size = tex->params.format->texel_size; struct pl_tex_transfer_params pl_params = { .tex = tex, .ptr = params->dst, - .stride_w = texel_stride_w(params->stride, tex), + .stride_w = params->stride / texel_size, #if PL_API_VER >= 60 .timer = get_active_timer(ra), #endif }; - return pl_tex_download(get_gpu(ra), &pl_params); + uint8_t *staging = NULL; + size_t stride = pl_params.stride_w * texel_size; + if (stride != params->stride) { + staging = talloc_size(NULL, tex->params.h * stride); + pl_params.ptr = staging; + } + + bool ok = pl_tex_download(get_gpu(ra), &pl_params); + if (ok && staging) { + for (int y = 0; y < tex->params.h; y++) { + memcpy((uint8_t *) params->dst + y * params->stride, + staging + y * stride, + stride); + } + } + + talloc_free(staging); + return ok; } static struct ra_buf *buf_create_pl(struct ra *ra, -- cgit v1.2.3