summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sub/draw_bmp.c1055
-rw-r--r--sub/draw_bmp.h3
-rw-r--r--sub/osd.c3
-rw-r--r--test/ref/draw_bmp.txt218
-rw-r--r--test/repack.c67
-rw-r--r--video/filter/vf_sub.c3
6 files changed, 938 insertions, 411 deletions
diff --git a/sub/draw_bmp.c b/sub/draw_bmp.c
index ba027838ec..e1caea1e0a 100644
--- a/sub/draw_bmp.c
+++ b/sub/draw_bmp.c
@@ -21,12 +21,11 @@
#include <math.h>
#include <inttypes.h>
-#include <libswscale/swscale.h>
-
#include "common/common.h"
#include "draw_bmp.h"
#include "img_convert.h"
#include "video/mp_image.h"
+#include "video/repack.h"
#include "video/sws_utils.h"
#include "video/img_format.h"
#include "video/csputils.h"
@@ -36,517 +35,761 @@ const bool mp_draw_sub_formats[SUBBITMAP_COUNT] = {
[SUBBITMAP_RGBA] = true,
};
-struct sub_cache {
- struct mp_image *i, *a;
-};
-
struct part {
int change_id;
- int imgfmt;
- enum mp_csp colorspace;
- enum mp_csp_levels levels;
+ // Sub-bitmaps scaled to final sizes.
int num_imgs;
- struct sub_cache *imgs;
+ struct mp_image **imgs;
+};
+
+// Must be a power of 2. Height is 1, but mark_rect() effectively operates on
+// multiples of chroma sized macro-pixels. (E.g. 4:2:0 -> every second line is
+// the same as the previous one, and x0%2==x1%2==0.)
+#define SLICE_W 256u
+
+// Whether to scale in tiles. Faster, but can't use correct chroma position.
+// Should be a runtime option. SLICE_W is used as tile width. The tile size
+// should probably be small; too small or too big will cause overhead when
+// scaling.
+#define SCALE_IN_TILES 1
+#define TILE_H 4u
+
+struct slice {
+ uint16_t x0, x1;
};
struct mp_draw_sub_cache
{
- struct part *parts[MAX_OSD_PARTS];
- struct mp_image *upsample_img;
- struct mp_image upsample_temp;
-};
+ // Possibly cached parts. Also implies what's in the video_overlay.
+ struct part parts[MAX_OSD_PARTS];
+ int64_t change_id;
+ struct mp_image_params params; // target image params
-static struct part *get_cache(struct mp_draw_sub_cache *cache,
- struct sub_bitmaps *sbs, struct mp_image *format);
-static bool get_sub_area(struct mp_rect bb, struct mp_image *temp,
- struct sub_bitmap *sb, struct mp_image *out_area,
- int *out_src_x, int *out_src_y);
+ int w, h; // like params.w/h, but rounded up to chroma
+ unsigned align_x, align_y; // alignment for all video pixels
-#define CONDITIONAL 1
+ struct mp_image *rgba_overlay; // all OSD in RGBA
+ struct mp_image *video_overlay; // rgba_overlay converted to video colorspace
+ struct mp_image *alpha_overlay; // alpha plane ref. to video_overlay
+ struct mp_image *calpha_overlay; // alpha_overlay scaled to chroma plane size
-#define BLEND_CONST_ALPHA(TYPE) \
- TYPE *dst_r = dst_rp; \
- for (int x = 0; x < w; x++) { \
- uint32_t srcap = srca_r[x]; \
- if (CONDITIONAL && !srcap) continue; \
- srcap *= srcamul; /* now 0..65025 */ \
- dst_r[x] = (srcp * srcap + dst_r[x] * (65025 - srcap) + 32512) / 65025; \
- }
+ unsigned s_w; // number of slices per line
+ struct slice *slices; // slices[y * s_w + x / SLICE_W]
+ bool any_osd;
+
+ struct mp_sws_context *rgba_to_overlay; // scaler for rgba -> video csp.
+ struct mp_sws_context *alpha_to_calpha; // scaler for overlay -> calpha
+ bool scale_in_tiles;
-// dst = srcp * (srca * srcamul) + dst * (1 - (srca * srcamul))
-static void blend_const_alpha(void *dst, int dst_stride, int srcp,
- uint8_t *srca, int srca_stride, uint8_t srcamul,
- int w, int h, int bytes)
+ struct mp_sws_context *sub_scale; // scaler for SUBBITMAP_RGBA
+
+ struct mp_repack *overlay_to_f32; // convert video_overlay to float
+ struct mp_image *overlay_tmp; // slice in float32
+
+ struct mp_repack *calpha_to_f32; // convert video_overlay to float
+ struct mp_image *calpha_tmp; // slice in float32
+
+ struct mp_repack *video_to_f32; // convert video to float
+ struct mp_repack *video_from_f32; // convert float back to video
+ struct mp_image *video_tmp; // slice in float32
+
+ struct mp_sws_context *premul; // video -> premultiplied video
+ struct mp_sws_context *unpremul; // reverse
+ struct mp_image *premul_tmp;
+
+ // Function that works on the _f32 data.
+ void (*blend_line)(void *dst, void *src, void *src_a, int w);
+};
+
+static void blend_line_f32(void *dst, void *src, void *src_a, int w)
{
- if (!srcamul)
- return;
- for (int y = 0; y < h; y++) {
- void *dst_rp = (uint8_t *)dst + dst_stride * y;
- uint8_t *srca_r = srca + srca_stride * y;
- if (bytes == 2) {
- BLEND_CONST_ALPHA(uint16_t)
- } else if (bytes == 1) {
- BLEND_CONST_ALPHA(uint8_t)
- }
- }
-}
+ float *dst_f = dst;
+ float *src_f = src;
+ float *src_a_f = src_a;
-#define BLEND_SRC_ALPHA(TYPE) \
- TYPE *dst_r = dst_rp, *src_r = src_rp; \
- for (int x = 0; x < w; x++) { \
- uint32_t srcap = srca_r[x]; \
- if (CONDITIONAL && !srcap) continue; \
- dst_r[x] = (src_r[x] * srcap + dst_r[x] * (255 - srcap) + 127) / 255; \
- }
+ for (int x = 0; x < w; x++)
+ dst_f[x] = src_f[x] + dst_f[x] * (1.0f - src_a_f[x]);
+}
-// dst = src * srca + dst * (1 - srca)
-static void blend_src_alpha(void *dst, int dst_stride, void *src,
- int src_stride, uint8_t *srca, int srca_stride,
- int w, int h, int bytes)
+static void blend_slice(struct mp_draw_sub_cache *p, int rgb_y)
{
- for (int y = 0; y < h; y++) {
- void *dst_rp = (uint8_t *)dst + dst_stride * y;
- void *src_rp = (uint8_t *)src + src_stride * y;
- uint8_t *srca_r = srca + srca_stride * y;
- if (bytes == 2) {
- BLEND_SRC_ALPHA(uint16_t)
- } else if (bytes == 1) {
- BLEND_SRC_ALPHA(uint8_t)
+ struct mp_image *ov = p->overlay_tmp;
+ struct mp_image *ca = p->calpha_tmp;
+ struct mp_image *vid = p->video_tmp;
+
+ for (int plane = 0; plane < vid->num_planes; plane++) {
+ int xs = vid->fmt.xs[plane];
+ int ys = vid->fmt.ys[plane];
+ int h = (1 << vid->fmt.chroma_ys) - (1 << ys) + 1;
+ int cw = mp_chroma_div_up(vid->w, xs);
+ for (int y = 0; y < h; y++) {
+ p->blend_line(mp_image_pixel_ptr(vid, plane, 0, y),
+ mp_image_pixel_ptr(ov, plane, 0, y),
+ xs || ys ? mp_image_pixel_ptr(ca, 0, 0, y)
+ : mp_image_pixel_ptr(ov, ov->num_planes - 1, 0, y),
+ cw);
}
}
}
-#define BLEND_SRC_DST_MUL(TYPE, MAX) \
- TYPE *dst_r = dst_rp; \
- for (int x = 0; x < w; x++) { \
- uint16_t srcp = src_r[x] * srcmul; /* now 0..65025 */ \
- dst_r[x] = (srcp * (MAX) + dst_r[x] * (65025 - srcp) + 32512) / 65025; \
+static bool blend_overlay_with_video(struct mp_draw_sub_cache *p,
+ struct mp_image *dst)
+{
+ if (!repack_config_buffers(p->video_to_f32, 0, p->video_tmp, 0, dst, NULL))
+ return false;
+ if (!repack_config_buffers(p->video_from_f32, 0, dst, 0, p->video_tmp, NULL))
+ return false;
+
+ int xs = dst->fmt.chroma_xs;
+ int ys = dst->fmt.chroma_ys;
+
+ for (int y = 0; y < dst->h; y += p->align_y) {
+ struct slice *line = &p->slices[y * p->s_w];
+
+ for (int sx = 0; sx < p->s_w; sx++) {
+ struct slice *s = &line[sx];
+
+ int w = s->x1 - s->x0;
+ if (w <= 0)
+ continue;
+ int x = sx * SLICE_W + s->x0;
+
+ assert(MP_IS_ALIGNED(x, p->align_x));
+ assert(MP_IS_ALIGNED(w, p->align_x));
+ assert(x + w <= p->w);
+
+ repack_line(p->overlay_to_f32, 0, 0, x, y, w);
+ repack_line(p->video_to_f32, 0, 0, x, y, w);
+ if (p->calpha_to_f32)
+ repack_line(p->calpha_to_f32, 0, 0, x >> xs, y >> ys, w >> xs);
+
+ blend_slice(p, y);
+
+ repack_line(p->video_from_f32, x, y, 0, 0, w);
+ }
}
-// dst = src * srcmul + dst * (1 - src * srcmul)
-static void blend_src_dst_mul(void *dst, int dst_stride,
- uint8_t *src, int src_stride, uint8_t srcmul,
- int w, int h, int dst_bytes)
+ return true;
+}
+
+static bool convert_overlay_part(struct mp_draw_sub_cache *p,
+ int x0, int y0, int w, int h)
{
- for (int y = 0; y < h; y++) {
- void *dst_rp = (uint8_t *)dst + dst_stride * y;
- uint8_t *src_r = (uint8_t *)src + src_stride * y;
- if (dst_bytes == 2) {
- BLEND_SRC_DST_MUL(uint16_t, 65025)
- } else if (dst_bytes == 1) {
- BLEND_SRC_DST_MUL(uint8_t, 255)
- }
+ struct mp_image src = *p->rgba_overlay;
+ struct mp_image dst = *p->video_overlay;
+
+ mp_image_crop(&src, x0, y0, x0 + w, y0 + h);
+ mp_image_crop(&dst, x0, y0, x0 + w, y0 + h);
+
+ if (mp_sws_scale(p->rgba_to_overlay, &dst, &src) < 0)
+ return false;
+
+ if (p->calpha_overlay) {
+ src = *p->alpha_overlay;
+ dst = *p->calpha_overlay;
+
+ int xs = p->video_overlay->fmt.chroma_xs;
+ int ys = p->video_overlay->fmt.chroma_ys;
+ mp_image_crop(&src, x0, y0, x0 + w, y0 + h);
+ mp_image_crop(&dst, x0 >> xs, y0 >> ys, (x0 + w) >> xs, (y0 + h) >> ys);
+
+ if (mp_sws_scale(p->alpha_to_calpha, &dst, &src) < 0)
+ return false;
}
+
+ return true;
}
-static void unpremultiply_and_split_BGR32(struct mp_image *img,
- struct mp_image *alpha)
+static bool convert_to_video_overlay(struct mp_draw_sub_cache *p)
{
- for (int y = 0; y < img->h; ++y) {
- uint32_t *irow = (uint32_t *) &img->planes[0][img->stride[0] * y];
- uint8_t *arow = &alpha->planes[0][alpha->stride[0] * y];
- for (int x = 0; x < img->w; ++x) {
- uint32_t pval = irow[x];
- uint32_t aval = (pval >> 24);
- uint32_t rval = (pval >> 16) & 0xFF;
- uint32_t gval = (pval >> 8) & 0xFF;
- uint32_t bval = pval & 0xFF;
- // multiplied = separate * alpha / 255
- // separate = rint(multiplied * 255 / alpha)
- // = floor(multiplied * 255 / alpha + 0.5)
- // = floor((multiplied * 255 + 0.5 * alpha) / alpha)
- // = floor((multiplied * 255 + floor(0.5 * alpha)) / alpha)
- int div = (int) aval;
- int add = div / 2;
- if (aval) {
- rval = MPMIN(255, (rval * 255 + add) / div);
- gval = MPMIN(255, (gval * 255 + add) / div);
- bval = MPMIN(255, (bval * 255 + add) / div);
- irow[x] = bval + (gval << 8) + (rval << 16) + (aval << 24);
+ if (!p->video_overlay)
+ return true;
+
+ if (p->scale_in_tiles) {
+ int t_h = p->rgba_overlay->h / TILE_H;
+ for (int ty = 0; ty < t_h; ty++) {
+ for (int sx = 0; sx < p->s_w; sx++) {
+ struct slice *s = &p->slices[ty * TILE_H * p->s_w + sx];
+ bool pixels_set = false;
+ for (int y = 0; y < TILE_H; y++) {
+ if (s[0].x0 < s[0].x1) {
+ pixels_set = true;
+ break;
+ }
+ s += p->s_w;
+ }
+ if (!pixels_set)
+ continue;
+ if (!convert_overlay_part(p, sx * SLICE_W, ty * TILE_H,
+ SLICE_W, TILE_H))
+ return false;
}
- arow[x] = aval;
}
+ } else {
+ if (!convert_overlay_part(p, 0, 0, p->rgba_overlay->w, p->rgba_overlay->h))
+ return false;
}
+
+ return true;
}
-// dst_format merely contains the target colorspace/format information
-static void scale_sb_rgba(struct sub_bitmap *sb, const struct mp_image *dst_format,
- struct mp_image **out_sbi, struct mp_image **out_sba)
+// Mark the given rectangle of pixels as possibly non-transparent.
+// The rectangle must have been pre-clipped.
+static void mark_rect(struct mp_draw_sub_cache *p, int x0, int y0, int x1, int y1)
{
- struct mp_image sbisrc = {0};
- mp_image_setfmt(&sbisrc, IMGFMT_BGR32);
- mp_image_set_size(&sbisrc, sb->w, sb->h);
- sbisrc.planes[0] = sb->bitmap;
- sbisrc.stride[0] = sb->stride;
- struct mp_image *sbisrc2 = mp_image_alloc(IMGFMT_BGR32, sb->dw, sb->dh);
- struct mp_image *sba = mp_image_alloc(IMGFMT_Y8, sb->dw, sb->dh);
- struct mp_image *sbi = mp_image_alloc(dst_format->imgfmt, sb->dw, sb->dh);
- if (!sbisrc2 || !sba || !sbi) {
- talloc_free(sbisrc2);
- talloc_free(sba);
- talloc_free(sbi);
- return;
- }
+ x0 = MP_ALIGN_DOWN(x0, p->align_x);
+ y0 = MP_ALIGN_DOWN(y0, p->align_y);
+ x1 = MP_ALIGN_UP(x1, p->align_x);
+ y1 = MP_ALIGN_UP(y1, p->align_y);
+
+ assert(x0 >= 0 && x0 <= x1 && x1 <= p->w);
+ assert(y0 >= 0 && y0 <= y1 && y1 <= p->h);
+
+ int sx0 = x0 / SLICE_W;
+ int sx1 = x1 / SLICE_W;
- mp_image_swscale(sbisrc2, &sbisrc, SWS_BILINEAR);
- unpremultiply_and_split_BGR32(sbisrc2, sba);
+ for (int y = y0; y < y1; y++) {
+ struct slice *line = &p->slices[y * p->s_w];
- sbi->params.color = dst_format->params.color;
- mp_image_swscale(sbi, sbisrc2, SWS_BILINEAR);
+ struct slice *s0 = &line[sx0];
+ struct slice *s1 = &line[sx1];
- talloc_free(sbisrc2);
+ s0->x0 = MPMIN(s0->x0, x0 % SLICE_W);
+ s1->x1 = MPMAX(s1->x1, x1 % SLICE_W);
- *out_sbi = sbi;
- *out_sba = sba;
+ if (s0 != s1) {
+ s0->x1 = SLICE_W;
+ s1->x0 = 0;
+
+ for (int x = sx0 + 1; x < sx1; x++) {
+ struct slice *s = &line[x];
+ s->x0 = 0;
+ s->x1 = SLICE_W;
+ }
+ }
+
+ p->any_osd = true;
+ }
}
-static void draw_rgba(struct mp_draw_sub_cache *cache, struct mp_rect bb,
- struct mp_image *temp, int bits,
- struct sub_bitmaps *sbs)
+static void draw_ass_rgba(uint8_t *dst, ptrdiff_t dst_stride,
+ uint8_t *src, ptrdiff_t src_stride,
+ int w, int h, uint32_t color)
{
- struct part *part = get_cache(cache, sbs, temp);
- assert(part);
+ const unsigned int r = (color >> 24) & 0xff;
+ const unsigned int g = (color >> 16) & 0xff;
+ const unsigned int b = (color >> 8) & 0xff;
+ const unsigned int a = 0xff - (color & 0xff);
- for (int i = 0; i < sbs->num_parts; ++i) {
- struct sub_bitmap *sb = &sbs->parts[i];
+ for (int y = 0; y < h; y++) {
+ uint32_t *dstrow = (uint32_t *) dst;
+ for (int x = 0; x < w; x++) {
+ const unsigned int v = src[x];
+ unsigned int aa = a * v;
+ uint32_t dstpix = dstrow[x];
+ unsigned int dstb = dstpix & 0xFF;
+ unsigned int dstg = (dstpix >> 8) & 0xFF;
+ unsigned int dstr = (dstpix >> 16) & 0xFF;
+ unsigned int dsta = (dstpix >> 24) & 0xFF;
+ dstb = (v * b * a + dstb * (255 * 255 - aa)) / (255 * 255);
+ dstg = (v * g * a + dstg * (255 * 255 - aa)) / (255 * 255);
+ dstr = (v * r * a + dstr * (255 * 255 - aa)) / (255 * 255);
+ dsta = (aa * 255 + dsta * (255 * 255 - aa)) / (255 * 255);
+ dstrow[x] = dstb | (dstg << 8) | (dstr << 16) | (dsta << 24);
+ }
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
- if (sb->w < 1 || sb->h < 1)
- continue;
+static void render_ass(struct mp_draw_sub_cache *p, struct sub_bitmaps *sb)
+{
+ assert(sb->format == SUBBITMAP_LIBASS);
- struct mp_image dst;
- int src_x, src_y;
- if (!get_sub_area(bb, temp, sb, &dst, &src_x, &src_y))
- continue;
+ for (int i = 0; i < sb->num_parts; i++) {
+ struct sub_bitmap *s = &sb->parts[i];
- struct mp_image *sbi = part->imgs[i].i;
- struct mp_image *sba = part->imgs[i].a;
+ draw_ass_rgba(mp_image_pixel_ptr(p->rgba_overlay, 0, s->x, s->y),
+ p->rgba_overlay->stride[0], s->bitmap, s->stride,
+ s->w, s->h, s->libass.color);
- if (!(sbi && sba))
- scale_sb_rgba(sb, temp, &sbi, &sba);
- // on OOM, skip drawing
- if (!(sbi && sba))
- continue;
+ mark_rect(p, s->x, s->y, s->x + s->w, s->y + s->h);
+ }
+}
- int bytes = (bits + 7) / 8;
- uint8_t *alpha_p = sba->planes[0] + src_y * sba->stride[0] + src_x;
- for (int p = 0; p < (temp->num_planes > 2 ? 3 : 1); p++) {
- void *src = sbi->planes[p] + src_y * sbi->stride[p] + src_x * bytes;
- blend_src_alpha(dst.planes[p], dst.stride[p], src, sbi->stride[p],
- alpha_p, sba->stride[0], dst.w, dst.h, bytes);
- }
- if (temp->num_planes >= 4) {
- blend_src_dst_mul(dst.planes[3], dst.stride[3], alpha_p,
- sba->stride[0], 255, dst.w, dst.h, bytes);
+static void draw_rgba(uint8_t *dst, ptrdiff_t dst_stride,
+ uint8_t *src, ptrdiff_t src_stride, int w, int h)
+{
+ for (int y = 0; y < h; y++) {
+ uint32_t *srcrow = (uint32_t *)src;
+ uint32_t *dstrow = (uint32_t *)dst;
+ for (int x = 0; x < w; x++) {
+ uint32_t srcpix = srcrow[x];
+ uint32_t dstpix = dstrow[x];
+ unsigned int srcb = srcpix & 0xFF;
+ unsigned int srcg = (srcpix >> 8) & 0xFF;
+ unsigned int srcr = (srcpix >> 16) & 0xFF;
+ unsigned int srca = (srcpix >> 24) & 0xFF;
+ unsigned int dstb = dstpix & 0xFF;
+ unsigned int dstg = (dstpix >> 8) & 0xFF;
+ unsigned int dstr = (dstpix >> 16) & 0xFF;
+ unsigned int dsta = (dstpix >> 24) & 0xFF;
+ dstb = srcb + dstb * (255 * 255 - srca) / (255 * 255);
+ dstg = srcg + dstg * (255 * 255 - srca) / (255 * 255);
+ dstr = srcr + dstr * (255 * 255 - srca) / (255 * 255);
+ dsta = srca + dsta * (255 * 255 - srca) / (255 * 255);
+ dstrow[x] = dstb | (dstg << 8) | (dstr << 16) | (dsta << 24);
}
-
- part->imgs[i].i = talloc_steal(part, sbi);
- part->imgs[i].a = talloc_steal(part, sba);
+ dst += dst_stride;
+ src += src_stride;
}
}
-static void draw_ass(struct mp_draw_sub_cache *cache, struct mp_rect bb,
- struct mp_image *temp, int bits, struct sub_bitmaps *sbs)
+static bool render_rgba(struct mp_draw_sub_cache *p, struct part *part,
+ struct sub_bitmaps *sb)
{
- struct mp_csp_params cspar = MP_CSP_PARAMS_DEFAULTS;
- mp_csp_set_image_params(&cspar, &temp->params);
- cspar.levels_out = MP_CSP_LEVELS_PC; // RGB (libass.color)
- cspar.input_bits = bits;
- cspar.texture_bits = (bits + 7) / 8 * 8;
-
- struct mp_cmat yuv2rgb, rgb2yuv;
- bool need_conv = temp->fmt.flags & MP_IMGFLAG_YUV;
- if (need_conv) {
- mp_get_csp_matrix(&cspar, &yuv2rgb);
- mp_invert_cmat(&rgb2yuv, &yuv2rgb);
+ assert(sb->format == SUBBITMAP_RGBA);
+
+ if (part->change_id != sb->change_id) {
+ for (int n = 0; n < part->num_imgs; n++)
+ talloc_free(part->imgs[n]);
+ part->num_imgs = sb->num_parts;
+ MP_TARRAY_GROW(p, part->imgs, part->num_imgs);
+ for (int n = 0; n < part->num_imgs; n++)
+ part->imgs[n] = NULL;
+
+ part->change_id = sb->change_id;
}
- for (int i = 0; i < sbs->num_parts; ++i) {
- struct sub_bitmap *sb = &sbs->parts[i];
+ for (int i = 0; i < sb->num_parts; i++) {
+ struct sub_bitmap *s = &sb->parts[i];
+
+ // Clipping is rare but necessary.
+ int sx0 = s->x;
+ int sy0 = s->y;
+ int sx1 = s->x + s->dw;
+ int sy1 = s->y + s->dh;
+
+ int x0 = MPCLAMP(sx0, 0, p->w);
+ int y0 = MPCLAMP(sy0, 0, p->h);
+ int x1 = MPCLAMP(sx1, 0, p->w);
+ int y1 = MPCLAMP(sy1, 0, p->h);
- struct mp_image dst;
- int src_x, src_y;
- if (!get_sub_area(bb, temp, sb, &dst, &src_x, &src_y))
+ int dw = x1 - x0;
+ int dh = y1 - y0;
+ if (dw <= 0 || dh <= 0)
continue;
- int r = (sb->libass.color >> 24) & 0xFF;
- int g = (sb->libass.color >> 16) & 0xFF;
- int b = (sb->libass.color >> 8) & 0xFF;
- int a = 255 - (sb->libass.color & 0xFF);
- int color_yuv[3];
- if (need_conv) {
- int rgb[3] = {r, g, b};
- mp_map_fixp_color(&rgb2yuv, 8, rgb, cspar.texture_bits, color_yuv);
- } else {
- const int shift = (bits > 8) ? bits - 8 : 0;
- color_yuv[0] = g << shift;
- color_yuv[1] = b << shift;
- color_yuv[2] = r << shift;
+ // We clip the source instead of the scaled image, because that might
+ // avoid excessive memory usage when applying a ridiculous scale factor,
+ // even if that stretches it to up to 1 pixel due to integer rounding.
+ int sx = 0;
+ int sy = 0;
+ int sw = s->w;
+ int sh = s->h;
+ if (x0 != sx0 || y0 != sy0 || x1 != sx1 || y1 != sy1) {
+ double fx = s->dw / (double)s->w;
+ double fy = s->dh / (double)s->h;
+ sx = MPCLAMP((x0 - sx0) / fx, 0, s->w);
+ sy = MPCLAMP((y0 - sy0) / fy, 0, s->h);
+ sw = MPCLAMP(dw / fx, 1, s->w);
+ sh = MPCLAMP(dh / fy, 1, s->h);
}
- int bytes = (bits + 7) / 8;
- uint8_t *alpha_p = (uint8_t *)sb->bitmap + src_y * sb->stride + src_x;
- for (int p = 0; p < (temp->num_planes > 2 ? 3 : 1); p++) {
- blend_const_alpha(dst.planes[p], dst.stride[p], color_yuv[p],
- alpha_p, sb->stride, a, dst.w, dst.h, bytes);
- }
- if (temp->num_planes >= 4) {
- blend_src_dst_mul(dst.planes[3], dst.stride[3], alpha_p,
- sb->stride, a, dst.w, dst.h, bytes);
+ assert(sx >= 0 && sw > 0 && sx + sw <= s->w);
+ assert(sy >= 0 && sh > 0 && sy + sh <= s->h);
+
+ ptrdiff_t s_stride = s->stride;
+ void *s_ptr = (char *)s->bitmap + s_stride * sy + sx * 4;
+
+ if (dw != sw || dh != sh) {
+ struct mp_image *scaled = part->imgs[i];
+
+ if (!scaled) {
+ struct mp_image src_img = {0};
+ mp_image_setfmt(&src_img, IMGFMT_BGR32);
+ mp_image_set_size(&src_img, sw, sh);
+ src_img.planes[0] = s_ptr;
+ src_img.stride[0] = s_stride;
+ src_img.params.alpha = MP_ALPHA_PREMUL;
+
+ scaled = mp_image_alloc(IMGFMT_BGR32, dw, dh);
+ if (!scaled)
+ return false;
+ part->imgs[i] = talloc_steal(p, scaled);
+ mp_image_copy_attributes(scaled, &src_img);
+
+ if (mp_sws_scale(p->sub_scale, scaled, &src_img) < 0)
+ return false;
+ }
+
+ assert(scaled->w == dw);
+ assert(scaled->h == dh);
+
+ s_stride = scaled->stride[0];
+ s_ptr = scaled->planes[0];
}
+
+ draw_rgba(mp_image_pixel_ptr(p->rgba_overlay, 0, x0, y0),
+ p->rgba_overlay->stride[0], s_ptr, s_stride, dw, dh);
+
+ mark_rect(p, x0, y0, x1, y1);
}
+
+ return true;
}
-static void get_swscale_alignment(const struct mp_image *img, int *out_xstep,
- int *out_ystep)
+static bool render_sb(struct mp_draw_sub_cache *p, struct sub_bitmaps *sb)
{
- int sx = (1 << img->fmt.chroma_xs);
- int sy = (1 << img->fmt.chroma_ys);
-
- for (int p = 0; p < img->num_planes; ++p) {
- int bits = img->fmt.bpp[p];
- // the * 2 fixes problems with writing past the destination width
- while (((sx >> img->fmt.chroma_xs) * bits) % (SWS_MIN_BYTE_ALIGN * 8 * 2))
- sx *= 2;
+ struct part *part = &p->parts[sb->render_index];
+
+ switch (sb->format) {
+ case SUBBITMAP_LIBASS:
+ render_ass(p, sb);
+ return true;
+ case SUBBITMAP_RGBA:
+ return render_rgba(p, part, sb);
}
- *out_xstep = sx;
- *out_ystep = sy;
+ return false;
}
-static void align_bbox(int xstep, int ystep, struct mp_rect *rc)
+static void clear_rgba_overlay(struct mp_draw_sub_cache *p)
{
- rc->x0 = rc->x0 & ~(xstep - 1);
- rc->y0 = rc->y0 & ~(ystep - 1);
- rc->x1 = FFALIGN(rc->x1, xstep);
- rc->y1 = FFALIGN(rc->y1, ystep);
-}
+ assert(p->rgba_overlay->imgfmt == IMGFMT_BGR32);
-// Post condition, if true returned: rc is inside img
-static bool align_bbox_for_swscale(struct mp_image *img, struct mp_rect *rc)
-{
- struct mp_rect img_rect = {0, 0, img->w, img->h};
- // Get rid of negative coordinates
- if (!mp_rect_intersection(rc, &img_rect))
- return false;
- int xstep, ystep;
- get_swscale_alignment(img, &xstep, &ystep);
- align_bbox(xstep, ystep, rc);
- return mp_rect_intersection(rc, &img_rect);
-}
+ for (int y = 0; y < p->rgba_overlay->h; y++) {
+ uint32_t *px = mp_image_pixel_ptr(p->rgba_overlay, 0, 0, y);
+ struct slice *line = &p->slices[y * p->s_w];
-// Try to find best/closest YUV 444 format (or similar) for imgfmt
-static void get_closest_y444_format(int imgfmt, int *out_format, int *out_bits)
-{
- struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
- int planes = desc.flags & MP_IMGFLAG_ALPHA ? 4 : 3;
- if (desc.flags & MP_IMGFLAG_RGB) {
- // For RGB try to match the amount of bits exactly (but no less than 8, or larger than 16)
- int bits = (desc.component_bits > 8) ? desc.component_bits : 8;
- if (bits > 16)
- bits = 16;
- *out_format = mp_imgfmt_find(0, 0, planes, bits, MP_IMGFLAG_RGB_P);
- if (!mp_sws_supported_format(*out_format))
- *out_format = mp_imgfmt_find(0, 0, planes, 8, MP_IMGFLAG_RGB_P);
- } else if (desc.flags & MP_IMGFLAG_YUV_P) {
- const int bits = (desc.component_bits > 8) ? 16 : 8;
- *out_format = mp_imgfmt_find(0, 0, planes, bits, MP_IMGFLAG_YUV_P);
- } else {
- *out_format = 0;
- }
- if (!mp_sws_supported_format(*out_format))
- *out_format = IMGFMT_444P; // generic fallback
- *out_bits = mp_imgfmt_get_desc(*out_format).component_bits;
-}
+ for (int sx = 0; sx < p->s_w; sx++) {
+ struct slice *s = &line[sx];
-static struct part *get_cache(struct mp_draw_sub_cache *cache,
- struct sub_bitmaps *sbs, struct mp_image *format)
-{
- struct part *part = NULL;
-
- bool use_cache = sbs->format == SUBBITMAP_RGBA;
- if (use_cache) {
- part = cache->parts[sbs->render_index];
- if (part) {
- if (part->change_id != sbs->change_id
- || part->imgfmt != format->imgfmt
- || part->colorspace != format->params.color.space
- || part->levels != format->params.color.levels)
- {
- talloc_free(part);
- part = NULL;
+ if (s->x0 <= s->x1) {
+ memset(px + s->x0, 0, (s->x1 - s->x0) * 4);
+ *s = (struct slice){SLICE_W, 0};
}
+
+ px += SLICE_W;
}
- if (!part) {
- part = talloc(cache, struct part);
- *part = (struct part) {
- .change_id = sbs->change_id,
- .num_imgs = sbs->num_parts,
- .imgfmt = format->imgfmt,
- .levels = format->params.color.levels,
- .colorspace = format->params.color.space,
- };
- part->imgs = talloc_zero_array(part, struct sub_cache,
- part->num_imgs);
- }
- assert(part->num_imgs == sbs->num_parts);
- cache->parts[sbs->render_index] = part;
}
- return part;
+ p->any_osd = false;
}
-// Return area of intersection between target and sub-bitmap as cropped image
-static bool get_sub_area(struct mp_rect bb, struct mp_image *temp,
- struct sub_bitmap *sb, struct mp_image *out_area,
- int *out_src_x, int *out_src_y)
+static bool reinit(struct mp_draw_sub_cache *p, struct mp_image_params *params)
{
- // coordinates are relative to the bbox
- struct mp_rect dst = {sb->x - bb.x0, sb->y - bb.y0};
- dst.x1 = dst.x0 + sb->dw;
- dst.y1 = dst.y0 + sb->dh;
- if (!mp_rect_intersection(&dst, &(struct mp_rect){0, 0, temp->w, temp->h}))
+ talloc_free_children(p);
+ *p = (struct mp_draw_sub_cache){.params = *params};
+
+ bool need_premul = params->alpha != MP_ALPHA_PREMUL &&
+ (mp_imgfmt_get_desc(params->imgfmt).flags & MP_IMGFLAG_ALPHA);
+
+ int rflags = REPACK_CREATE_EXPAND_8BIT | REPACK_CREATE_PLANAR_F32;
+ p->blend_line = blend_line_f32;
+
+ p->video_to_f32 = mp_repack_create_planar(params->imgfmt, false, rflags);
+ talloc_steal(p, p->video_to_f32);
+ if (!p->video_to_f32)
return false;
- *out_src_x = (dst.x0 - sb->x) + bb.x0;
- *out_src_y = (dst.y0 - sb->y) + bb.y0;
- *out_area = *temp;
- mp_image_crop_rc(out_area, dst);
+ p->scale_in_tiles = SCALE_IN_TILES;
- return true;
-}
+ int vid_f32_fmt = mp_repack_get_format_dst(p->video_to_f32);
-// Convert the src image to imgfmt (which should be a 444 format)
-static struct mp_image *chroma_up(struct mp_draw_sub_cache *cache, int imgfmt,
- struct mp_image *src)
-{
- if (src->imgfmt == imgfmt)
- return src;
+ p->video_from_f32 = mp_repack_create_planar(params->imgfmt, true, rflags);
+ talloc_steal(p, p->video_from_f32);
+ if (!p->video_from_f32)
+ return false;
- if (!cache->upsample_img || cache->upsample_img->imgfmt != imgfmt ||
- cache->upsample_img->w < src->w || cache->upsample_img->h < src->h)
- {
- talloc_free(cache->upsample_img);
- cache->upsample_img = mp_image_alloc(imgfmt, src->w, src->h);
- talloc_steal(cache, cache->upsample_img);
- if (!cache->upsample_img)
- return NULL;
+ assert(mp_repack_get_format_dst(p->video_to_f32) ==
+ mp_repack_get_format_src(p->video_from_f32));
+
+ // Find a reasonable intermediate format for video_overlay. Requirements:
+ // - same subsampling
+ // - has alpha
+ // - uses video colorspace
+ // - REPACK_CREATE_PLANAR_F32 support
+ // - probably not using float (vaguely wastes memory)
+ struct mp_regular_imgfmt vfdesc = {0};
+ mp_get_regular_imgfmt(&vfdesc, mp_repack_get_format_dst(p->video_to_f32));
+ assert(vfdesc.component_type == MP_COMPONENT_TYPE_FLOAT);
+
+ int overlay_fmt = 0;
+ if (params->color.space == MP_CSP_RGB && vfdesc.num_planes >= 3) {
+ // No point in doing anything fancy.
+ overlay_fmt = IMGFMT_BGR32;
+ p->scale_in_tiles = false;
+ } else {
+ struct mp_regular_imgfmt odesc = vfdesc;
+ // Just use 8 bit as well (should be fine, may use less memory).
+ odesc.component_type = MP_COMPONENT_TYPE_UINT;
+ odesc.component_size = 1;
+ odesc.component_pad = 0;
+
+ // Ensure there's alpha.
+ if (odesc.planes[odesc.num_planes - 1].components[0] != 4) {
+ if (odesc.num_planes >= 4)
+ return false; // wat
+ odesc.planes[odesc.num_planes++] =
+ (struct mp_regular_imgfmt_plane){1, {4}};
+ }
+
+ overlay_fmt = mp_find_regular_imgfmt(&odesc);
+ p->scale_in_tiles = odesc.chroma_xs || odesc.chroma_ys;
}
+ if (!overlay_fmt)
+ return false;
- cache->upsample_temp = *cache->upsample_img;
- struct mp_image *temp = &cache->upsample_temp;
- mp_image_set_size(temp, src->w, src->h);
-
- // The temp image is always YUV, but src not necessarily.
- // Reduce amount of conversions in YUV case (upsampling/shifting only)
- if (src->fmt.flags & MP_IMGFLAG_YUV)
- temp->params.color = src->params.color;
-
- if (src->imgfmt == IMGFMT_420P) {
- assert(imgfmt == IMGFMT_444P);
- // Faster upsampling: keep Y plane, upsample chroma planes only
- // The whole point is not having swscale copy the Y plane
- struct mp_image t_dst = *temp;
- mp_image_setfmt(&t_dst, IMGFMT_Y8);
- mp_image_set_size(&t_dst, temp->w, temp->h);
- struct mp_image t_src = t_dst;
- mp_image_set_size(&t_src, src->w >> 1, src->h >> 1);
- for (int c = 0; c < 2; c++) {
- t_dst.planes[0] = temp->planes[1 + c];
- t_dst.stride[0] = temp->stride[1 + c];
- t_src.planes[0] = src->planes[1 + c];
- t_src.stride[0] = src->stride[1 + c];
- mp_image_swscale(&t_dst, &t_src, SWS_POINT);
- }
- temp->planes[0] = src->planes[0];
- temp->stride[0] = src->stride[0];
- } else {
- mp_image_swscale(temp, src, SWS_POINT);
+ p->overlay_to_f32 = mp_repack_create_planar(overlay_fmt, false, rflags);
+ talloc_steal(p, p->overlay_to_f32);
+ if (!p->overlay_to_f32)
+ return false;
+
+ int render_fmt = mp_repack_get_format_dst(p->overlay_to_f32);
+
+ struct mp_regular_imgfmt ofdesc = {0};
+ mp_get_regular_imgfmt(&ofdesc, render_fmt);
+
+ if (ofdesc.planes[ofdesc.num_planes - 1].components[0] != 4)
+ return false;
+
+ // The formats must be the same, minus possible lack of alpha in vfdesc.
+ if (ofdesc.num_planes != vfdesc.num_planes &&
+ ofdesc.num_planes - 1 != vfdesc.num_planes)
+ return false;
+ for (int n = 0; n < vfdesc.num_planes; n++) {
+ if (vfdesc.planes[n].components[0] != ofdesc.planes[n].components[0])
+ return false;
}
- return temp;
-}
+ p->align_x = mp_repack_get_align_x(p->video_to_f32);
+ p->align_y = mp_repack_get_align_y(p->video_to_f32);
-// Undo chroma_up() (copy temp to old_src if needed)
-static void chroma_down(struct mp_image *old_src, struct mp_image *temp)
-{
- assert(old_src->w == temp->w && old_src->h == temp->h);
- if (temp != old_src) {
- if (old_src->imgfmt == IMGFMT_420P) {