From 8e6704acdbe020aacc71dbe15981651d6bcca25e Mon Sep 17 00:00:00 2001 From: wm4 Date: Fri, 17 Jun 2016 23:11:05 +0200 Subject: sub, vo_opengl: use packed sub-bitmaps directly if available The previous few commits changed sd_lavc.c's output to packed RGB sub- images. In particular, this means all sub-bitmaps are part of a larger, single bitmap. Change the vo_opengl OSD code such that it can make use of this, and upload the pre-packed image, instead of packing and copying them again. This complicates the upload code a bit (4 code paths due to messy PBO handling). The plan is to make sub-bitmaps always packed, but some more work is required to reach this point. The plan is to pack libass images as well. Since this implies a copy, this will make it easy to refcount the result. (This is all targeted towards vo_opengl. Other VOs, vo_xv, vo_x11, and vo_wayland in particular, will become less efficient. Although at least vo_vdpau and vo_direct3d could be switched to the new method as well.) --- sub/img_convert.c | 2 + sub/osd.h | 16 ++++++++ sub/sd_lavc.c | 13 +++++- video/out/opengl/osd.c | 108 ++++++++++++++++++++++++++++++++++++------------- 4 files changed, 110 insertions(+), 29 deletions(-) diff --git a/sub/img_convert.c b/sub/img_convert.c index 2015e49ca6..348811e773 100644 --- a/sub/img_convert.c +++ b/sub/img_convert.c @@ -80,6 +80,7 @@ bool osd_scale_rgba(struct osd_conv_cache *c, struct sub_bitmaps *imgs) talloc_free(c->parts); imgs->parts = c->parts = talloc_array(c, struct sub_bitmap, src.num_parts); + imgs->packed = NULL; // Note: we scale all parts, since most likely all need scaling anyway, and // to get a proper copy of all data in the imgs list. @@ -158,6 +159,7 @@ bool osd_conv_ass_to_rgba(struct osd_conv_cache *c, struct sub_bitmaps *imgs) imgs->format = SUBBITMAP_RGBA; imgs->parts = c->part; imgs->num_parts = num_bb; + imgs->packed = NULL; size_t newsize = 0; for (int n = 0; n < num_bb; n++) { diff --git a/sub/osd.h b/sub/osd.h index b50b72c587..7cfc695873 100644 --- a/sub/osd.h +++ b/sub/osd.h @@ -42,6 +42,11 @@ struct sub_bitmap { int x, y; int dw, dh; + // If the containing struct sub_bitmaps has the packed field set, then this + // is the position within the source. (Strictly speaking this is redundant + // with the bitmap pointer.) + int src_x, src_y; + struct { uint32_t color; } libass; @@ -60,6 +65,17 @@ struct sub_bitmaps { struct sub_bitmap *parts; int num_parts; + // Packed representation of the bitmap data. If non-NULL, then the + // parts[].bitmap pointer points into the image data here (and stride will + // correspond to packed->stride[0]). + // SUBBITMAP_RGBA: IMGFMT_BGRA (exact match) + // Other formats have this set to NULL. + struct mp_image *packed; + + // Bounding box for the packed image. All parts will be within the bounding + // box. (The origin of the box is at (0,0).) + int packed_w, packed_h; + int change_id; // Incremented on each change }; diff --git a/sub/sd_lavc.c b/sub/sd_lavc.c index cc72efc54d..b7420d33c6 100644 --- a/sub/sd_lavc.c +++ b/sub/sd_lavc.c @@ -44,6 +44,7 @@ struct sub { struct sub_bitmap *inbitmaps; int count; struct mp_image *data; + int bound_w, bound_h; int src_w, src_h; double pts; double endpts; @@ -254,7 +255,10 @@ static void read_sub_bitmaps(struct sd *sd, struct sub *sub) struct pos bb[2]; packer_get_bb(priv->packer, bb); - if (!sub->data || sub->data->w < bb[1].x || sub->data->h < bb[1].y) { + sub->bound_w = bb[1].x; + sub->bound_h = bb[1].y; + + if (!sub->data || sub->data->w < sub->bound_w || sub->data->h < sub->bound_h) { talloc_free(sub->data); sub->data = mp_image_alloc(IMGFMT_BGRA, priv->packer->w, priv->packer->h); if (!sub->data) { @@ -279,6 +283,8 @@ static void read_sub_bitmaps(struct sd *sd, struct sub *sub) b->h = r->h; b->x = r->x; b->y = r->y; + b->src_x = pos.x; + b->src_y = pos.y; b->stride = sub->data->stride[0]; b->bitmap = sub->data->planes[0] + pos.y * b->stride + pos.x * 4; @@ -307,6 +313,8 @@ static void read_sub_bitmaps(struct sd *sd, struct sub *sub) } b->bitmap = (char*)b->bitmap - extend * b->stride - extend * 4; + b->src_x -= extend; + b->src_y -= extend; b->x -= extend; b->y -= extend; b->w += extend * 2; @@ -443,6 +451,9 @@ static void get_bitmaps(struct sd *sd, struct mp_osd_res d, double pts, if (priv->displayed_id != current->id) res->change_id++; priv->displayed_id = current->id; + res->packed = current->data; + res->packed_w = current->bound_w; + res->packed_h = current->bound_h; res->format = SUBBITMAP_RGBA; double video_par = 0; diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c index 2d4d51ab6a..7b1ec162fd 100644 --- a/video/out/opengl/osd.c +++ b/video/out/opengl/osd.c @@ -17,6 +17,8 @@ #include #include +#include + #include #include "video/out/bitmap_packer.h" @@ -64,6 +66,7 @@ struct mpgl_osd { struct mp_log *log; struct osd_state *osd; GL *gl; + GLint max_tex_wh; bool use_pbo; struct mpgl_osd_part *parts[MAX_OSD_PARTS]; const struct gl_format *fmt_table[SUBBITMAP_COUNT]; @@ -78,9 +81,6 @@ struct mpgl_osd { struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd) { - GLint max_texture_size; - gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); - struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); *ctx = (struct mpgl_osd) { .log = log, @@ -89,6 +89,8 @@ struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd .scratch = talloc_zero_size(ctx, 1), }; + gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &ctx->max_tex_wh); + ctx->fmt_table[SUBBITMAP_LIBASS] = gl_find_unorm_format(gl, 1, 1); ctx->fmt_table[SUBBITMAP_RGBA] = gl_find_unorm_format(gl, 1, 4); @@ -96,8 +98,8 @@ struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd struct mpgl_osd_part *p = talloc_ptrtype(ctx, p); *p = (struct mpgl_osd_part) { .packer = talloc_struct(p, struct bitmap_packer, { - .w_max = max_texture_size, - .h_max = max_texture_size, + .w_max = ctx->max_tex_wh, + .h_max = ctx->max_tex_wh, }), }; ctx->parts[n] = p; @@ -163,16 +165,36 @@ static bool upload(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, goto done; } } else { - if (!osd->upload) - osd->upload = talloc_size(NULL, buffer_size); - data = osd->upload; - texdata = data; + if (!imgs->packed) { + if (!osd->upload) + osd->upload = talloc_size(NULL, buffer_size); + data = osd->upload; + texdata = data; + } } - struct pos bb[2]; - packer_get_bb(osd->packer, bb); - size_t stride = osd->w * pix_stride; - packer_copy_subbitmaps(osd->packer, imgs, data, pix_stride, stride); + int copy_w = 0; + int copy_h = 0; + size_t stride = 0; + if (imgs->packed) { + copy_w = imgs->packed_w; + copy_h = imgs->packed_h; + stride = imgs->packed->stride[0]; + texdata = imgs->packed->planes[0]; + if (pbo) { + memcpy_pic(data, texdata, pix_stride * copy_w, copy_h, + osd->w * pix_stride, stride); + stride = osd->w * pix_stride; + texdata = NULL; + } + } else { + struct pos bb[2]; + packer_get_bb(osd->packer, bb); + copy_w = bb[1].x; + copy_h = bb[1].y; + stride = osd->w * pix_stride; + packer_copy_subbitmaps(osd->packer, imgs, data, pix_stride, stride); + } if (pbo) { if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) { @@ -182,7 +204,7 @@ static bool upload(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, } gl_upload_tex(gl, GL_TEXTURE_2D, fmt->format, fmt->type, texdata, stride, - bb[0].x, bb[0].y, bb[1].x - bb[0].x, bb[1].y - bb[0].y); + 0, 0, copy_w, copy_h); if (pbo) gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -196,17 +218,42 @@ done: return success; } +static int next_pow2(int v) +{ + for (int x = 0; x < 30; x++) { + if ((1 << x) >= v) + return 1 << x; + } + return INT_MAX; +} + static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, struct sub_bitmaps *imgs) { GL *gl = ctx->gl; - // assume 2x2 filter on scaling - osd->packer->padding = imgs->scaled; - int r = packer_pack_from_subbitmaps(osd->packer, imgs); - if (r < 0) { + int req_w = 0; + int req_h = 0; + + if (imgs->packed) { + req_w = next_pow2(imgs->packed_w); + req_h = next_pow2(imgs->packed_h); + } else { + // assume 2x2 filter on scaling + osd->packer->padding = imgs->scaled; + int r = packer_pack_from_subbitmaps(osd->packer, imgs); + if (r < 0) { + MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " + "supported size %dx%d.\n", osd->packer->w_max, osd->packer->h_max); + return false; + } + req_w = osd->packer->w; + req_h = osd->packer->h; + } + + if (req_w > ctx->max_tex_wh || req_h > ctx->max_tex_wh) { MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " - "supported size %dx%d.\n", osd->packer->w_max, osd->packer->h_max); + "supported size %dx%d.\n", ctx->max_tex_wh, ctx->max_tex_wh); return false; } @@ -218,12 +265,10 @@ static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, gl->BindTexture(GL_TEXTURE_2D, osd->texture); - if (osd->packer->w > osd->w || osd->packer->h > osd->h - || osd->format != imgs->format) - { + if (req_w > osd->w || req_h > osd->h || osd->format != imgs->format) { osd->format = imgs->format; - osd->w = FFMAX(32, osd->packer->w); - osd->h = FFMAX(32, osd->packer->h); + osd->w = FFMAX(32, req_w); + osd->h = FFMAX(32, req_h); gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, osd->w, osd->h, 0, fmt->format, fmt->type, NULL); @@ -261,18 +306,26 @@ static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) struct mpgl_osd_part *osd = ctx->parts[imgs->render_index]; + bool ok = true; if (imgs->change_id != osd->change_id) { if (!upload_osd(ctx, osd, imgs)) - osd->packer->count = 0; + ok = false; osd->change_id = imgs->change_id; ctx->change_counter += 1; } - osd->num_subparts = osd->packer->count; + osd->num_subparts = ok ? imgs->num_parts : 0; MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts); memcpy(osd->subparts, imgs->parts, osd->num_subparts * sizeof(osd->subparts[0])); + + if (!imgs->packed) { + for (int n = 0; n < osd->num_subparts; n++) { + osd->subparts[n].src_x = osd->packer->result[n].x; + osd->subparts[n].src_y = osd->packer->result[n].y; + } + } } static void write_quad(struct vertex *va, struct gl_transform t, @@ -300,7 +353,6 @@ static int generate_verts(struct mpgl_osd_part *part, struct gl_transform t) for (int n = 0; n < part->num_subparts; n++) { struct sub_bitmap *b = &part->subparts[n]; - struct pos pos = part->packer->result[n]; struct vertex *va = part->vertices; // NOTE: the blend color is used with SUBBITMAP_LIBASS only, so it @@ -311,7 +363,7 @@ static int generate_verts(struct mpgl_osd_part *part, struct gl_transform t) write_quad(&va[n * 6], t, b->x, b->y, b->x + b->dw, b->y + b->dh, - pos.x, pos.y, pos.x + b->w, pos.y + b->h, + b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h, part->w, part->h, color); } -- cgit v1.2.3