summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2020-05-09 18:01:07 +0200
committerwm4 <wm4@nowhere>2020-05-09 18:02:57 +0200
commitc1a961ad78b6d1da339e622c723d753a80687824 (patch)
tree4d3331052d5054603dfe780d291919dae09caedb
parent9190b3c4694d9fbbe23429a0402a3a236d16e4fb (diff)
downloadmpv-c1a961ad78b6d1da339e622c723d753a80687824.tar.bz2
mpv-c1a961ad78b6d1da339e622c723d753a80687824.tar.xz
draw_bmp: rewrite
draw_bmp.c is the software blender for subtitles and OSD. It's used by encoding mode (burning subtitles), and some VOs, like vo_drm, vo_x11, vo_xv, and possibly more. This changes the algorithm from upsampling the video to 4:4:4 and then blending to downsampling the OSD and then blending directly to video. This has far-reaching consequences for its internals, and results in an effective rewrite. Since I wanted to avoid un-premultiplying, all blending is done with premultiplied alpha. That's actually the sane thing to do. The old code just didn't do it, because it's very weird in YUV fixed point. Essentially, you'd have to compensate for the chroma centering constant by subtracting src_alpha/255*128. This seemed so hairy (especially with correct rounding and high bit depths involved) that I went for using float. I think it turned out mostly OK, although it's more complex and less maintainable than before. reinit() is certainly a bit too long. While it should be possible to optimize the RGB path more (for example by blending directly instead of doing the stupid float conversion), this is probably slower. vo_xv users probably lose in this, because it takes the slowest path (due to subsampling requirements and using YUV). Why this rewrite? Nobody knows. I simply forgot the reason. But you'll have it anyway. Whether or not this would have required a full rewrite, at least it supports target alpha now (you can for example hard sub transparent PNGs, if you ever wanted to use mpv for this). Remove the check in vf_sub. The new draw_bmp.c is not as reliant on libswscale anymore (mostly uses repack.c now), and osd.c shows an error message on missing support instead now. Formats with chroma subsampling of 4 are not supported, because FFmpeg doesn't provide pixfmt definitions for alpha variants. We could provide those ourselves (relatively trivial), but why bother.
-rw-r--r--sub/draw_bmp.c1055
-rw-r--r--sub/draw_bmp.h3
-rw-r--r--sub/osd.c3
-rw-r--r--test/ref/draw_bmp.txt218
-rw-r--r--test/repack.c67
-rw-r--r--video/filter/vf_sub.c3
6 files changed, 938 insertions, 411 deletions
diff --git a/sub/draw_bmp.c b/sub/draw_bmp.c
index ba027838ec..e1caea1e0a 100644
--- a/sub/draw_bmp.c
+++ b/sub/draw_bmp.c
@@ -21,12 +21,11 @@
#include <math.h>
#include <inttypes.h>
-#include <libswscale/swscale.h>
-
#include "common/common.h"
#include "draw_bmp.h"
#include "img_convert.h"
#include "video/mp_image.h"
+#include "video/repack.h"
#include "video/sws_utils.h"
#include "video/img_format.h"
#include "video/csputils.h"
@@ -36,517 +35,761 @@ const bool mp_draw_sub_formats[SUBBITMAP_COUNT] = {
[SUBBITMAP_RGBA] = true,
};
-struct sub_cache {
- struct mp_image *i, *a;
-};
-
struct part {
int change_id;
- int imgfmt;
- enum mp_csp colorspace;
- enum mp_csp_levels levels;
+ // Sub-bitmaps scaled to final sizes.
int num_imgs;
- struct sub_cache *imgs;
+ struct mp_image **imgs;
+};
+
+// Must be a power of 2. Height is 1, but mark_rect() effectively operates on
+// multiples of chroma sized macro-pixels. (E.g. 4:2:0 -> every second line is
+// the same as the previous one, and x0%2==x1%2==0.)
+#define SLICE_W 256u
+
+// Whether to scale in tiles. Faster, but can't use correct chroma position.
+// Should be a runtime option. SLICE_W is used as tile width. The tile size
+// should probably be small; too small or too big will cause overhead when
+// scaling.
+#define SCALE_IN_TILES 1
+#define TILE_H 4u
+
+struct slice {
+ uint16_t x0, x1;
};
struct mp_draw_sub_cache
{
- struct part *parts[MAX_OSD_PARTS];
- struct mp_image *upsample_img;
- struct mp_image upsample_temp;
-};
+ // Possibly cached parts. Also implies what's in the video_overlay.
+ struct part parts[MAX_OSD_PARTS];
+ int64_t change_id;
+ struct mp_image_params params; // target image params
-static struct part *get_cache(struct mp_draw_sub_cache *cache,
- struct sub_bitmaps *sbs, struct mp_image *format);
-static bool get_sub_area(struct mp_rect bb, struct mp_image *temp,
- struct sub_bitmap *sb, struct mp_image *out_area,
- int *out_src_x, int *out_src_y);
+ int w, h; // like params.w/h, but rounded up to chroma
+ unsigned align_x, align_y; // alignment for all video pixels
-#define CONDITIONAL 1
+ struct mp_image *rgba_overlay; // all OSD in RGBA
+ struct mp_image *video_overlay; // rgba_overlay converted to video colorspace
+ struct mp_image *alpha_overlay; // alpha plane ref. to video_overlay
+ struct mp_image *calpha_overlay; // alpha_overlay scaled to chroma plane size
-#define BLEND_CONST_ALPHA(TYPE) \
- TYPE *dst_r = dst_rp; \
- for (int x = 0; x < w; x++) { \
- uint32_t srcap = srca_r[x]; \
- if (CONDITIONAL && !srcap) continue; \
- srcap *= srcamul; /* now 0..65025 */ \
- dst_r[x] = (srcp * srcap + dst_r[x] * (65025 - srcap) + 32512) / 65025; \
- }
+ unsigned s_w; // number of slices per line
+ struct slice *slices; // slices[y * s_w + x / SLICE_W]
+ bool any_osd;
+
+ struct mp_sws_context *rgba_to_overlay; // scaler for rgba -> video csp.
+ struct mp_sws_context *alpha_to_calpha; // scaler for overlay -> calpha
+ bool scale_in_tiles;
-// dst = srcp * (srca * srcamul) + dst * (1 - (srca * srcamul))
-static void blend_const_alpha(void *dst, int dst_stride, int srcp,
- uint8_t *srca, int srca_stride, uint8_t srcamul,
- int w, int h, int bytes)
+ struct mp_sws_context *sub_scale; // scaler for SUBBITMAP_RGBA
+
+ struct mp_repack *overlay_to_f32; // convert video_overlay to float
+ struct mp_image *overlay_tmp; // slice in float32
+
+ struct mp_repack *calpha_to_f32; // convert video_overlay to float
+ struct mp_image *calpha_tmp; // slice in float32
+
+ struct mp_repack *video_to_f32; // convert video to float
+ struct mp_repack *video_from_f32; // convert float back to video
+ struct mp_image *video_tmp; // slice in float32
+
+ struct mp_sws_context *premul; // video -> premultiplied video
+ struct mp_sws_context *unpremul; // reverse
+ struct mp_image *premul_tmp;
+
+ // Function that works on the _f32 data.
+ void (*blend_line)(void *dst, void *src, void *src_a, int w);
+};
+
+static void blend_line_f32(void *dst, void *src, void *src_a, int w)
{
- if (!srcamul)
- return;
- for (int y = 0; y < h; y++) {
- void *dst_rp = (uint8_t *)dst + dst_stride * y;
- uint8_t *srca_r = srca + srca_stride * y;
- if (bytes == 2) {
- BLEND_CONST_ALPHA(uint16_t)
- } else if (bytes == 1) {
- BLEND_CONST_ALPHA(uint8_t)
- }
- }
-}
+ float *dst_f = dst;
+ float *src_f = src;
+ float *src_a_f = src_a;
-#define BLEND_SRC_ALPHA(TYPE) \
- TYPE *dst_r = dst_rp, *src_r = src_rp; \
- for (int x = 0; x < w; x++) { \
- uint32_t srcap = srca_r[x]; \
- if (CONDITIONAL && !srcap) continue; \
- dst_r[x] = (src_r[x] * srcap + dst_r[x] * (255 - srcap) + 127) / 255; \
- }
+ for (int x = 0; x < w; x++)
+ dst_f[x] = src_f[x] + dst_f[x] * (1.0f - src_a_f[x]);
+}
-// dst = src * srca + dst * (1 - srca)
-static void blend_src_alpha(void *dst, int dst_stride, void *src,
- int src_stride, uint8_t *srca, int srca_stride,
- int w, int h, int bytes)
+static void blend_slice(struct mp_draw_sub_cache *p, int rgb_y)
{
- for (int y = 0; y < h; y++) {
- void *dst_rp = (uint8_t *)dst + dst_stride * y;
- void *src_rp = (uint8_t *)src + src_stride * y;
- uint8_t *srca_r = srca + srca_stride * y;
- if (bytes == 2) {
- BLEND_SRC_ALPHA(uint16_t)
- } else if (bytes == 1) {
- BLEND_SRC_ALPHA(uint8_t)
+ struct mp_image *ov = p->overlay_tmp;
+ struct mp_image *ca = p->calpha_tmp;
+ struct mp_image *vid = p->video_tmp;
+
+ for (int plane = 0; plane < vid->num_planes; plane++) {
+ int xs = vid->fmt.xs[plane];
+ int ys = vid->fmt.ys[plane];
+ int h = (1 << vid->fmt.chroma_ys) - (1 << ys) + 1;
+ int cw = mp_chroma_div_up(vid->w, xs);
+ for (int y = 0; y < h; y++) {
+ p->blend_line(mp_image_pixel_ptr(vid, plane, 0, y),
+ mp_image_pixel_ptr(ov, plane, 0, y),
+ xs || ys ? mp_image_pixel_ptr(ca, 0, 0, y)
+ : mp_image_pixel_ptr(ov, ov->num_planes - 1, 0, y),
+ cw);
}
}
}
-#define BLEND_SRC_DST_MUL(TYPE, MAX) \
- TYPE *dst_r = dst_rp; \
- for (int x = 0; x < w; x++) { \
- uint16_t srcp = src_r[x] * srcmul; /* now 0..65025 */ \
- dst_r[x] = (srcp * (MAX) + dst_r[x] * (65025 - srcp) + 32512) / 65025; \
+static bool blend_overlay_with_video(struct mp_draw_sub_cache *p,
+ struct mp_image *dst)
+{
+ if (!repack_config_buffers(p->video_to_f32, 0, p->video_tmp, 0, dst, NULL))
+ return false;
+ if (!repack_config_buffers(p->video_from_f32, 0, dst, 0, p->video_tmp, NULL))
+ return false;
+
+ int xs = dst->fmt.chroma_xs;
+ int ys = dst->fmt.chroma_ys;
+
+ for (int y = 0; y < dst->h; y += p->align_y) {
+ struct slice *line = &p->slices[y * p->s_w];
+
+ for (int sx = 0; sx < p->s_w; sx++) {
+ struct slice *s = &line[sx];
+
+ int w = s->x1 - s->x0;
+ if (w <= 0)
+ continue;
+ int x = sx * SLICE_W + s->x0;
+
+ assert(MP_IS_ALIGNED(x, p->align_x));
+ assert(MP_IS_ALIGNED(w, p->align_x));
+ assert(x + w <= p->w);
+
+ repack_line(p->overlay_to_f32, 0, 0, x, y, w);
+ repack_line(p->video_to_f32, 0, 0, x, y, w);
+ if (p->calpha_to_f32)
+ repack_line(p->calpha_to_f32, 0, 0, x >> xs, y >> ys, w >> xs);
+
+ blend_slice(p, y);
+
+ repack_line(p->video_from_f32, x, y, 0, 0, w);
+ }
}
-// dst = src * srcmul + dst * (1 - src * srcmul)
-static void blend_src_dst_mul(void *dst, int dst_stride,
- uint8_t *src, int src_stride, uint8_t srcmul,
- int w, int h, int dst_bytes)
+ return true;
+}
+
+static bool convert_overlay_part(struct mp_draw_sub_cache *p,
+ int x0, int y0, int w, int h)
{
- for (int y = 0; y < h; y++) {
- void *dst_rp = (uint8_t *)dst + dst_stride * y;
- uint8_t *src_r = (uint8_t *)src + src_stride * y;
- if (dst_bytes == 2) {
- BLEND_SRC_DST_MUL(uint16_t, 65025)
- } else if (dst_bytes == 1) {
- BLEND_SRC_DST_MUL(uint8_t, 255)
- }
+ struct mp_image src = *p->rgba_overlay;
+ struct mp_image dst = *p->video_overlay;
+
+ mp_image_crop(&src, x0, y0, x0 + w, y0 + h);
+ mp_image_crop(&dst, x0, y0, x0 + w, y0 + h);
+
+ if (mp_sws_scale(p->rgba_to_overlay, &dst, &src) < 0)
+ return false;
+
+ if (p->calpha_overlay) {
+ src = *p->alpha_overlay;
+ dst = *p->calpha_overlay;
+
+ int xs = p->video_overlay->fmt.chroma_xs;
+ int ys = p->video_overlay->fmt.chroma_ys;
+ mp_image_crop(&src, x0, y0, x0 + w, y0 + h);
+ mp_image_crop(&dst, x0 >> xs, y0 >> ys, (x0 + w) >> xs, (y0 + h) >> ys);
+
+ if (mp_sws_scale(p->alpha_to_calpha, &dst, &src) < 0)
+ return false;
}
+
+ return true;
}
-static void unpremultiply_and_split_BGR32(struct mp_image *img,
- struct mp_image *alpha)
+static bool convert_to_video_overlay(struct mp_draw_sub_cache *p)
{
- for (int y = 0; y < img->h; ++y) {
- uint32_t *irow = (uint32_t *) &img->planes[0][img->stride[0] * y];
- uint8_t *arow = &alpha->planes[0][alpha->stride[0] * y];
- for (int x = 0; x < img->w; ++x) {
- uint32_t pval = irow[x];
- uint32_t aval = (pval >> 24);
- uint32_t rval = (pval >> 16) & 0xFF;
- uint32_t gval = (pval >> 8) & 0xFF;
- uint32_t bval = pval & 0xFF;
- // multiplied = separate * alpha / 255
- // separate = rint(multiplied * 255 / alpha)
- // = floor(multiplied * 255 / alpha + 0.5)
- // = floor((multiplied * 255 + 0.5 * alpha) / alpha)
- // = floor((multiplied * 255 + floor(0.5 * alpha)) / alpha)
- int div = (int) aval;
- int add = div / 2;
- if (aval) {
- rval = MPMIN(255, (rval * 255 + add) / div);
- gval = MPMIN(255, (gval * 255 + add) / div);
- bval = MPMIN(255, (bval * 255 + add) / div);
- irow[x] = bval + (gval << 8) + (rval << 16) + (aval << 24);
+ if (!p->video_overlay)
+ return true;
+
+ if (p->scale_in_tiles) {
+ int t_h = p->rgba_overlay->h / TILE_H;
+ for (int ty = 0; ty < t_h; ty++) {
+ for (int sx = 0; sx < p->s_w; sx++) {
+ struct slice *s = &p->slices[ty * TILE_H * p->s_w + sx];
+ bool pixels_set = false;
+ for (int y = 0; y < TILE_H; y++) {
+ if (s[0].x0 < s[0].x1) {
+ pixels_set = true;
+ break;
+ }
+ s += p->s_w;
+ }
+ if (!pixels_set)
+ continue;
+ if (!convert_overlay_part(p, sx * SLICE_W, ty * TILE_H,
+ SLICE_W, TILE_H))
+ return false;
}
- arow[x] = aval;
}
+ } else {
+ if (!convert_overlay_part(p, 0, 0, p->rgba_overlay->w, p->rgba_overlay->h))
+ return false;
}
+
+ return true;
}
-// dst_format merely contains the target colorspace/format information
-static void scale_sb_rgba(struct sub_bitmap *sb, const struct mp_image *dst_format,
- struct mp_image **out_sbi, struct mp_image **out_sba)
+// Mark the given rectangle of pixels as possibly non-transparent.
+// The rectangle must have been pre-clipped.
+static void mark_rect(struct mp_draw_sub_cache *p, int x0, int y0, int x1, int y1)
{
- struct mp_image sbisrc = {0};
- mp_image_setfmt(&sbisrc, IMGFMT_BGR32);
- mp_image_set_size(&sbisrc, sb->w, sb->h);
- sbisrc.planes[0] = sb->bitmap;
- sbisrc.stride[0] = sb->stride;
- struct mp_image *sbisrc2 = mp_image_alloc(IMGFMT_BGR32, sb->dw, sb->dh);
- struct mp_image *sba = mp_image_alloc(IMGFMT_Y8, sb->dw, sb->dh);
- struct mp_image *sbi = mp_image_alloc(dst_format->imgfmt, sb->dw, sb->dh);
- if (!sbisrc2 || !sba || !sbi) {
- talloc_free(sbisrc2);
- talloc_free(sba);
- talloc_free(sbi);
- return;
- }
+ x0 = MP_ALIGN_DOWN(x0, p->align_x);
+ y0 = MP_ALIGN_DOWN(y0, p->align_y);
+ x1 = MP_ALIGN_UP(x1, p->align_x);
+ y1 = MP_ALIGN_UP(y1, p->align_y);
+
+ assert(x0 >= 0 && x0 <= x1 && x1 <= p->w);
+ assert(y0 >= 0 && y0 <= y1 && y1 <= p->h);
+
+ int sx0 = x0 / SLICE_W;
+ int sx1 = x1 / SLICE_W;
- mp_image_swscale(sbisrc2, &sbisrc, SWS_BILINEAR);
- unpremultiply_and_split_BGR32(sbisrc2, sba);
+ for (int y = y0; y < y1; y++) {
+ struct slice *line = &p->slices[y * p->s_w];
- sbi->params.color = dst_format->params.color;
- mp_image_swscale(sbi, sbisrc2, SWS_BILINEAR);
+ struct slice *s0 = &line[sx0];
+ struct slice *s1 = &line[sx1];
- talloc_free(sbisrc2);
+ s0->x0 = MPMIN(s0->x0, x0 % SLICE_W);
+ s1->x1 = MPMAX(s1->x1, x1 % SLICE_W);
- *out_sbi = sbi;
- *out_sba = sba;
+ if (s0 != s1) {
+ s0->x1 = SLICE_W;
+ s1->x0 = 0;
+
+ for (int x = sx0 + 1; x < sx1; x++) {
+ struct slice *s = &line[x];
+ s->x0 = 0;
+ s->x1 = SLICE_W;
+ }
+ }
+
+ p->any_osd = true;
+ }
}
-static void draw_rgba(struct mp_draw_sub_cache *cache, struct mp_rect bb,
- struct mp_image *temp, int bits,
- struct sub_bitmaps *sbs)
+static void draw_ass_rgba(uint8_t *dst, ptrdiff_t dst_stride,
+ uint8_t *src, ptrdiff_t src_stride,
+ int w, int h, uint32_t color)
{
- struct part *part = get_cache(cache, sbs, temp);
- assert(part);
+ const unsigned int r = (color >> 24) & 0xff;
+ const unsigned int g = (color >> 16) & 0xff;
+ const unsigned int b = (color >> 8) & 0xff;
+ const unsigned int a = 0xff - (color & 0xff);
- for (int i = 0; i < sbs->num_parts; ++i) {
- struct sub_bitmap *sb = &sbs->parts[i];
+ for (int y = 0; y < h; y++) {
+ uint32_t *dstrow = (uint32_t *) dst;
+ for (int x = 0; x < w; x++) {
+ const unsigned int v = src[x];
+ unsigned int aa = a * v;
+ uint32_t dstpix = dstrow[x];
+ unsigned int dstb = dstpix & 0xFF;
+ unsigned int dstg = (dstpix >> 8) & 0xFF;
+ unsigned int dstr = (dstpix >> 16) & 0xFF;
+ unsigned int dsta = (dstpix >> 24) & 0xFF;
+ dstb = (v * b * a + dstb * (255 * 255 - aa)) / (255 * 255);
+ dstg = (v * g * a + dstg * (255 * 255 - aa)) / (255 * 255);
+ dstr = (v * r * a + dstr * (255 * 255 - aa)) / (255 * 255);
+ dsta = (aa * 255 + dsta * (255 * 255 - aa)) / (255 * 255);
+ dstrow[x] = dstb | (dstg << 8) | (dstr << 16) | (dsta << 24);
+ }
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
- if (sb->w < 1 || sb->h < 1)
- continue;
+static void render_ass(struct mp_draw_sub_cache *p, struct sub_bitmaps *sb)
+{
+ assert(sb->format == SUBBITMAP_LIBASS);
- struct mp_image dst;
- int src_x, src_y;
- if (!get_sub_area(bb, temp, sb, &dst, &src_x, &src_y))
- continue;
+ for (int i = 0; i < sb->num_parts; i++) {
+ struct sub_bitmap *s = &sb->parts[i];
- struct mp_image *sbi = part->imgs[i].i;
- struct mp_image *sba = part->imgs[i].a;
+ draw_ass_rgba(mp_image_pixel_ptr(p->rgba_overlay, 0, s->x, s->y),
+ p->rgba_overlay->stride[0], s->bitmap, s->stride,
+ s->w, s->h, s->libass.color);
- if (!(sbi && sba))
- scale_sb_rgba(sb, temp, &sbi, &sba);
- // on OOM, skip drawing
- if (!(sbi && sba))
- continue;
+ mark_rect(p, s->x, s->y, s->x + s->w, s->y + s->h);
+ }
+}
- int bytes = (bits + 7) / 8;
- uint8_t *alpha_p = sba->planes[0] + src_y * sba->stride[0] + src_x;
- for (int p = 0; p < (temp->num_planes > 2 ? 3 : 1); p++) {
- void *src = sbi->planes[p] + src_y * sbi->stride[p] + src_x * bytes;
- blend_src_alpha(dst.planes[p], dst.stride[p], src, sbi->stride[p],
- alpha_p, sba->stride[0], dst.w, dst.h, bytes);
- }
- if (temp->num_planes >= 4) {
- blend_src_dst_mul(dst.planes[3], dst.stride[3], alpha_p,
- sba->stride[0], 255, dst.w, dst.h, bytes);
+static void draw_rgba(uint8_t *dst, ptrdiff_t dst_stride,
+ uint8_t *src, ptrdiff_t src_stride, int w, int h)
+{
+ for (int y = 0; y < h; y++) {
+ uint32_t *srcrow = (uint32_t *)src;
+ uint32_t *dstrow = (uint32_t *)dst;
+ for (int x = 0; x < w; x++) {
+ uint32_t srcpix = srcrow[x];
+ uint32_t dstpix = dstrow[x];
+ unsigned int srcb = srcpix & 0xFF;
+ unsigned int srcg = (srcpix >> 8) & 0xFF;
+ unsigned int srcr = (srcpix >> 16) & 0xFF;
+ unsigned int srca = (srcpix >> 24) & 0xFF;
+ unsigned int dstb = dstpix & 0xFF;
+ unsigned int dstg = (dstpix >> 8) & 0xFF;
+ unsigned int dstr = (dstpix >> 16) & 0xFF;
+ unsigned int dsta = (dstpix >> 24) & 0xFF;
+ dstb = srcb + dstb * (255 * 255 - srca) / (255 * 255);
+ dstg = srcg + dstg * (255 * 255 - srca) / (255 * 255);
+ dstr = srcr + dstr * (255 * 255 - srca) / (255 * 255);
+ dsta = srca + dsta * (255 * 255 - srca) / (255 * 255);
+ dstrow[x] = dstb | (dstg << 8) | (dstr << 16) | (dsta << 24);
}
-
- part->imgs[i].i = talloc_steal(part, sbi);
- part->imgs[i].a = talloc_steal(part, sba);
+ dst += dst_stride;
+ src += src_stride;
}
}
-static void draw_ass(struct mp_draw_sub_cache *cache, struct mp_rect bb,
- struct mp_image *temp, int bits, struct sub_bitmaps *sbs)
+static bool render_rgba(struct mp_draw_sub_cache *p, struct part *part,
+ struct sub_bitmaps *sb)
{
- struct mp_csp_params cspar = MP_CSP_PARAMS_DEFAULTS;
- mp_csp_set_image_params(&cspar, &temp->params);
- cspar.levels_out = MP_CSP_LEVELS_PC; // RGB (libass.color)
- cspar.input_bits = bits;
- cspar.texture_bits = (bits + 7) / 8 * 8;
-
- struct mp_cmat yuv2rgb, rgb2yuv;
- bool need_conv = temp->fmt.flags & MP_IMGFLAG_YUV;
- if (need_conv) {
- mp_get_csp_matrix(&cspar, &yuv2rgb);
- mp_invert_cmat(&rgb2yuv, &yuv2rgb);
+ assert(sb->format == SUBBITMAP_RGBA);
+
+ if (part->change_id != sb->change_id) {
+ for (int n = 0; n < part->num_imgs; n++)
+ talloc_free(part->imgs[n]);
+ part->num_imgs = sb->num_parts;
+ MP_TARRAY_GROW(p, part->imgs, part->num_imgs);
+ for (int n = 0; n < part->num_imgs; n++)
+ part->imgs[n] = NULL;
+
+ part->change_id = sb->change_id;
}
- for (int i = 0; i < sbs->num_parts; ++i) {
- struct sub_bitmap *sb = &sbs->parts[i];
+ for (int i = 0; i < sb->num_parts; i++) {
+ struct sub_bitmap *s = &sb->parts[i];
+
+ // Clipping is rare but necessary.
+ int sx0 = s->x;
+ int sy0 = s->y;
+ int sx1 = s->x + s->dw;
+ int sy1 = s->y + s->dh;
+
+ int x0 = MPCLAMP(sx0, 0, p->w);
+ int y0 = MPCLAMP(sy0, 0, p->h);
+ int x1 = MPCLAMP(sx1, 0, p->w);
+ int y1 = MPCLAMP(sy1, 0, p->h);
- struct mp_image dst;
- int src_x, src_y;
- if (!get_sub_area(bb, temp, sb, &dst, &src_x, &src_y))
+ int dw = x1 - x0;
+ int dh = y1 - y0;
+ if (dw <= 0 || dh <= 0)
continue;
- int r = (sb->libass.color >> 24) & 0xFF;
- int g = (sb->libass.color >> 16) & 0xFF;
- int b = (sb->libass.color >> 8) & 0xFF;
- int a = 255 - (sb->libass.color & 0xFF);
- int color_yuv[3];
- if (need_conv) {
- int rgb[3] = {r, g, b};
- mp_map_fixp_color(&rgb2yuv, 8, rgb, cspar.texture_bits, color_yuv);
- } else {
- const int shift = (bits > 8) ? bits - 8 : 0;
- color_yuv[0] = g << shift;
- color_yuv[1] = b << shift;
- color_yuv[2] = r << shift;
+ // We clip the source instead of the scaled image, because that might
+ // avoid excessive memory usage when applying a ridiculous scale factor,
+ // even if that stretches it to up to 1 pixel due to integer rounding.
+ int sx = 0;
+ int sy = 0;
+ int sw = s->w;
+ int sh = s->h;
+ if (x0 != sx0 || y0 != sy0 || x1 != sx1 || y1 != sy1) {
+ double fx = s->dw / (double)s->w;
+ double fy = s->dh / (double)s->h;
+ sx = MPCLAMP((x0 - sx0) / fx, 0, s->w);
+ sy = MPCLAMP((y0 - sy0) / fy, 0, s->h);
+ sw = MPCLAMP(dw / fx, 1, s->w);
+ sh = MPCLAMP(dh / fy, 1, s->h);
}
- int bytes = (bits + 7) / 8;
- uint8_t *alpha_p = (uint8_t *)sb->bitmap + src_y * sb->stride + src_x;
- for (int p = 0; p < (temp->num_planes > 2 ? 3 : 1); p++) {
- blend_const_alpha(dst.planes[p], dst.stride[p], color_yuv[p],
- alpha_p, sb->stride, a, dst.w, dst.h, bytes);
- }
- if (temp->num_planes >= 4) {
- blend_src_dst_mul(dst.planes[3], dst.stride[3], alpha_p,
- sb->stride, a, dst.w, dst.h, bytes);
+ assert(sx >= 0 && sw > 0 && sx + sw <= s->w);
+ assert(sy >= 0 && sh > 0 && sy + sh <= s->h);
+
+ ptrdiff_t s_stride = s->stride;
+ void *s_ptr = (char *)s->bitmap + s_stride * sy + sx * 4;
+
+ if (dw != sw || dh != sh) {
+ struct mp_image *scaled = part->imgs[i];
+
+ if (!scaled) {
+ struct mp_image src_img = {0};
+ mp_image_setfmt(&src_img, IMGFMT_BGR32);
+ mp_image_set_size(&src_img, sw, sh);
+ src_img.planes[0] = s_ptr;
+ src_img.stride[0] = s_stride;
+ src_img.params.alpha = MP_ALPHA_PREMUL;
+
+ scaled = mp_image_alloc(IMGFMT_BGR32, dw, dh);
+ if (!scaled)
+ return false;
+ part->imgs[i] = talloc_steal(p, scaled);
+ mp_image_copy_attributes(scaled, &src_img);
+
+ if (mp_sws_scale(p->sub_scale, scaled, &src_img) < 0)
+ return false;
+ }
+
+ assert(scaled->w == dw);
+ assert(scaled->h == dh);
+
+ s_stride = scaled->stride[0];
+ s_ptr = scaled->planes[0];
}
+
+ draw_rgba(mp_image_pixel_ptr(p->rgba_overlay, 0, x0, y0),
+ p->rgba_overlay->stride[0], s_ptr, s_stride, dw, dh);
+
+ mark_rect(p, x0, y0, x1, y1);
}
+
+ return true;
}
-static void get_swscale_alignment(const struct mp_image *img, int *out_xstep,
- int *out_ystep)
+static bool render_sb(struct mp_draw_sub_cache *p, struct sub_bitmaps *sb)
{
- int sx = (1 << img->fmt.chroma_xs);
- int sy = (1 << img->fmt.chroma_ys);
-
- for (int p = 0; p < img->num_planes; ++p) {
- int bits = img->fmt.bpp[p];
- // the * 2 fixes problems with writing past the destination width
- while (((sx >> img->fmt.chroma_xs) * bits) % (SWS_MIN_BYTE_ALIGN * 8 * 2))
- sx *= 2;
+ struct part *part = &p->parts[sb->render_index];
+
+ switch (sb->format) {
+ case SUBBITMAP_LIBASS:
+ render_ass(p, sb);
+ return true;
+ case SUBBITMAP_RGBA:
+ return render_rgba(p, part, sb);
}
- *out_xstep = sx;
- *out_ystep = sy;
+ return false;
}
-static void align_bbox(int xstep, int ystep, struct mp_rect *rc)
+static void clear_rgba_overlay(struct mp_draw_sub_cache *p)
{
- rc->x0 = rc->x0 & ~(xstep - 1);
- rc->y0 = rc->y0 & ~(ystep - 1);
- rc->x1 = FFALIGN(rc->x1, xstep);
- rc->y1 = FFALIGN(rc->y1, ystep);
-}
+ assert(p->rgba_overlay->imgfmt == IMGFMT_BGR32);
-// Post condition, if true returned: rc is inside img
-static bool align_bbox_for_swscale(struct mp_image *img, struct mp_rect *rc)
-{
- struct mp_rect img_rect = {0, 0, img->w, img->h};
- // Get rid of negative coordinates
- if (!mp_rect_intersection(rc, &img_rect))
- return false;
- int xstep, ystep;
- get_swscale_alignment(img, &xstep, &ystep);
- align_bbox(xstep, ystep, rc);
- return mp_rect_intersection(rc, &img_rect);
-}
+ for (int y = 0; y < p->rgba_overlay->h; y++) {
+ uint32_t *px = mp_image_pixel_ptr(p->rgba_overlay, 0, 0, y);
+ struct slice *line = &p->slices[y * p->s_w];
-// Try to find best/closest YUV 444 format (or similar) for imgfmt
-static void get_closest_y444_format(int imgfmt, int *out_format, int *out_bits)
-{
- struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
- int planes = desc.flags & MP_IMGFLAG_ALPHA ? 4 : 3;
- if (desc.flags & MP_IMGFLAG_RGB) {
- // For RGB try to match the amount of bits exactly (but no less than 8, or larger than 16)
- int bits = (desc.component_bits > 8) ? desc.component_bits : 8;
- if (bits > 16)
- bits = 16;
- *out_format = mp_imgfmt_find(0, 0, planes, bits, MP_IMGFLAG_RGB_P);
- if (!mp_sws_supported_format(*out_format))
- *out_format = mp_imgfmt_find(0, 0, planes, 8, MP_IMGFLAG_RGB_P);
- } else if (desc.flags & MP_IMGFLAG_YUV_P) {
- const int bits = (desc.component_bits > 8) ? 16 : 8;
- *out_format = mp_imgfmt_find(0, 0, planes, bits, MP_IMGFLAG_YUV_P);
- } else {
- *out_format = 0;
- }
- if (!mp_sws_supported_format(*out_format))
- *out_format = IMGFMT_444P; // generic fallback
- *out_bits = mp_imgfmt_get_desc(*out_format).component_bits;
-}
+ for (int sx = 0; sx < p->s_w; sx++) {
+ struct slice *s = &line[sx];
-static struct part *get_cache(struct mp_draw_sub_cache *cache,
- struct sub_bitmaps *sbs, struct mp_image *format)
-{
- struct part *part = NULL;
-
- bool use_cache = sbs->format == SUBBITMAP_RGBA;
- if (use_cache) {
- part = cache->parts[sbs->render_index];
- if (part) {
- if (part->change_id != sbs->change_id
- || part->imgfmt != format->imgfmt
- || part->colorspace != format->params.color.space
- || part->levels != format->params.color.levels)
- {
- talloc_free(part);
- part = NULL;
+ if (s->x0 <= s->x1) {
+ memset(px + s->x0, 0, (s->x1 - s->x0) * 4);
+ *s = (struct slice){SLICE_W, 0};
}
+
+ px += SLICE_W;
}
- if (!part) {
- part = talloc(cache, struct part);
- *part = (struct part) {
- .change_id = sbs->change_id,
- .num_imgs = sbs->num_parts,
- .imgfmt = format->imgfmt,
- .levels = format->params.color.levels,
- .colorspace = format->params.color.space,
- };
- part->imgs = talloc_zero_array(part, struct sub_cache,
- part->num_imgs);
- }
- assert(part->num_imgs == sbs->num_parts);
- cache->parts[sbs->render_index] = part;
}
- return part;
+ p->any_osd = false;
}
-// Return area of intersection between target and sub-bitmap as cropped image
-static bool get_sub_area(struct mp_rect bb, struct mp_image *temp,
- struct sub_bitmap *sb, struct mp_image *out_area,
- int *out_src_x, int *out_src_y)
+static bool reinit(struct mp_draw_sub_cache *p, struct mp_image_params *params)
{
- // coordinates are relative to the bbox
- struct mp_rect dst = {sb->x - bb.x0, sb->y - bb.y0};
- dst.x1 = dst.x0 + sb->dw;
- dst.y1 = dst.y0 + sb->dh;
- if (!mp_rect_intersection(&dst, &(struct mp_rect){0, 0, temp->w, temp->h}))
+ talloc_free_children(p);
+ *p = (struct mp_draw_sub_cache){.params = *params};
+
+ bool need_premul = params->alpha != MP_ALPHA_PREMUL &&
+ (mp_imgfmt_get_desc(params->imgfmt).flags & MP_IMGFLAG_ALPHA);
+
+ int rflags = REPACK_CREATE_EXPAND_8BIT | REPACK_CREATE_PLANAR_F32;
+ p->blend_line = blend_line_f32;
+
+ p->video_to_f32 = mp_repack_create_planar(params->imgfmt, false, rflags);
+ talloc_steal(p, p->video_to_f32);
+ if (!p->video_to_f32)
return false;
- *out_src_x = (dst.x0 - sb->x) + bb.x0;
- *out_src_y = (dst.y0 - sb->y) + bb.y0;
- *out_area = *temp;
- mp_image_crop_rc(out_area, dst);
+ p->scale_in_tiles = SCALE_IN_TILES;
- return true;
-}
+ int vid_f32_fmt = mp_repack_get_format_dst(p->video_to_f32);
-// Convert the src image to imgfmt (which should be a 444 format)
-static struct mp_image *chroma_up(struct mp_draw_sub_cache *cache, int imgfmt,
- struct mp_image *src)
-{
- if (src->imgfmt == imgfmt)
- return src;
+ p->video_from_f32 = mp_repack_create_planar(params->imgfmt, true, rflags);
+ talloc_steal(p, p->video_from_f32);
+ if (!p->video_from_f32)
+ return false;
- if (!cache->upsample_img || cache->upsample_img->imgfmt != imgfmt ||
- cache->upsample_img->w < src->w || cache->upsample_img->h < src->h)
- {
- talloc_free(cache->upsample_img);
- cache->upsample_img = mp_image_alloc(imgfmt, src->w, src->h);
- talloc_steal(cache, cache->upsample_img);
- if (!cache->upsample_img)
- return NULL;
+ assert(mp_repack_get_format_dst(p->video_to_f32) ==
+ mp_repack_get_format_src(p->video_from_f32));
+
+ // Find a reasonable intermediate format for video_overlay. Requirements:
+ // - same subsampling
+ // - has alpha
+ // - uses video colorspace
+ // - REPACK_CREATE_PLANAR_F32 support
+ // - probably not using float (vaguely wastes memory)
+ struct mp_regular_imgfmt vfdesc = {0};
+ mp_get_regular_imgfmt(&vfdesc, mp_repack_get_format_dst(p->video_to_f32));
+ assert(vfdesc.component_type == MP_COMPONENT_TYPE_FLOAT);
+
+ int overlay_fmt = 0;
+ if (params->color.space == MP_CSP_RGB && vfdesc.num_planes >= 3) {
+ // No point in doing anything fancy.
+ overlay_fmt = IMGFMT_BGR32;
+ p->scale_in_tiles = false;
+ } else {
+ struct mp_regular_imgfmt odesc = vfdesc;
+ // Just use 8 bit as well (should be fine, may use less memory).
+ odesc.component_type = MP_COMPONENT_TYPE_UINT;
+ odesc.component_size = 1;
+ odesc.component_pad = 0;
+
+ // Ensure there's alpha.
+ if (odesc.planes[odesc.num_planes - 1].components[0] != 4) {
+ if (odesc.num_planes >= 4)
+ return false; // wat
+ odesc.planes[odesc.num_planes++] =
+ (struct mp_regular_imgfmt_plane){1, {4}};
+ }
+
+ overlay_fmt = mp_find_regular_imgfmt(&odesc);
+ p->scale_in_tiles = odesc.chroma_xs || odesc.chroma_ys;
}
+ if (!overlay_fmt)
+ return false;
- cache->upsample_temp = *cache->upsample_img;
- struct mp_image *temp = &cache->upsample_temp;