summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDr.Smile <vabnick@gmail.com>2021-09-26 05:06:54 +0300
committerDr.Smile <vabnick@gmail.com>2021-09-26 17:13:07 +0300
commit813237b76e3219294a87160407d51932e53d5721 (patch)
tree093268e0dace26d43e8f68342b65bfb9bf8d0e7a
parent4c3ace7cdcc740652331dbd57cc500b4ad50be81 (diff)
downloadlibass-813237b76e3219294a87160407d51932e53d5721.tar.bz2
libass-813237b76e3219294a87160407d51932e53d5721.tar.xz
rasterizer: use max-blending for outline pair merge
Max-blending is more correct than previous addition-blending in case of small (less than pixel) outline offsets. Fixes buffer overrun (up to 16 bytes read past rst->tile) in add_bitmaps() in case of engine->tile_order < engine->align_order (AVX2 assembly with LARGE_TILES disabled) due to insufficient padding.
-rw-r--r--libass/ass_bitmap.h2
-rw-r--r--libass/ass_func_template.h4
-rw-r--r--libass/ass_rasterizer.c3
-rw-r--r--libass/ass_rasterizer_c.c21
-rw-r--r--libass/x86/rasterizer.asm46
5 files changed, 74 insertions, 2 deletions
diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h
index 60364cd..1a17fd5 100644
--- a/libass/ass_bitmap.h
+++ b/libass/ass_bitmap.h
@@ -34,6 +34,7 @@ typedef void (*FillHalfplaneTileFunc)(uint8_t *buf, ptrdiff_t stride,
typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride,
const struct segment *line, size_t n_lines,
int winding);
+typedef void (*MergeTileFunc)(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile);
typedef void (*BitmapBlendFunc)(uint8_t *dst, intptr_t dst_stride,
uint8_t *src, intptr_t src_stride,
@@ -67,6 +68,7 @@ typedef struct {
FillSolidTileFunc fill_solid;
FillHalfplaneTileFunc fill_halfplane;
FillGenericTileFunc fill_generic;
+ MergeTileFunc merge_tile;
// blend functions
BitmapBlendFunc add_bitmaps, imul_bitmaps;
diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h
index 3556a76..c1ea12a 100644
--- a/libass/ass_func_template.h
+++ b/libass/ass_func_template.h
@@ -30,6 +30,8 @@ void DECORATE(fill_generic_tile16)(uint8_t *buf, ptrdiff_t stride,
void DECORATE(fill_generic_tile32)(uint8_t *buf, ptrdiff_t stride,
const struct segment *line, size_t n_lines,
int winding);
+void DECORATE(merge_tile16)(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile);
+void DECORATE(merge_tile32)(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile);
void DECORATE(add_bitmaps)(uint8_t *dst, intptr_t dst_stride,
uint8_t *src, intptr_t src_stride,
@@ -97,11 +99,13 @@ const BitmapEngine DECORATE(bitmap_engine) = {
.fill_solid = DECORATE(fill_solid_tile32),
.fill_halfplane = DECORATE(fill_halfplane_tile32),
.fill_generic = DECORATE(fill_generic_tile32),
+ .merge_tile = DECORATE(merge_tile32),
#else
.tile_order = 4,
.fill_solid = DECORATE(fill_solid_tile16),
.fill_halfplane = DECORATE(fill_halfplane_tile16),
.fill_generic = DECORATE(fill_generic_tile16),
+ .merge_tile = DECORATE(merge_tile16),
#endif
.add_bitmaps = DECORATE(add_bitmaps),
diff --git a/libass/ass_rasterizer.c b/libass/ass_rasterizer.c
index 1edec57..f55c757 100644
--- a/libass/ass_rasterizer.c
+++ b/libass/ass_rasterizer.c
@@ -674,8 +674,7 @@ static bool rasterizer_fill_level(const BitmapEngine *engine, RasterizerData *rs
else
engine->fill_halfplane(rst->tile, width, line1->a, line1->b, line1->c,
flags1 & FLAG_REVERSE ? -line1->scale : line1->scale);
- // XXX: better to use max instead of add
- engine->add_bitmaps(buf, stride, rst->tile, width, width, height);
+ engine->merge_tile(buf, stride, rst->tile);
rst->size[index] = offs;
return true;
}
diff --git a/libass/ass_rasterizer_c.c b/libass/ass_rasterizer_c.c
index 6c81695..07459d0 100644
--- a/libass/ass_rasterizer_c.c
+++ b/libass/ass_rasterizer_c.c
@@ -377,3 +377,24 @@ void ass_fill_generic_tile32_c(uint8_t *buf, ptrdiff_t stride,
buf += stride;
}
}
+
+
+void ass_merge_tile16_c(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile)
+{
+ for (int y = 0; y < 16; y++) {
+ for (int x = 0; x < 16; x++)
+ buf[x] = FFMAX(buf[x], tile[x]);
+ buf += stride;
+ tile += 16;
+ }
+}
+
+void ass_merge_tile32_c(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile)
+{
+ for (int y = 0; y < 32; y++) {
+ for (int x = 0; x < 32; x++)
+ buf[x] = FFMAX(buf[x], tile[x]);
+ buf += stride;
+ tile += 32;
+ }
+}
diff --git a/libass/x86/rasterizer.asm b/libass/x86/rasterizer.asm
index 909eb47..f9853e7 100644
--- a/libass/x86/rasterizer.asm
+++ b/libass/x86/rasterizer.asm
@@ -847,3 +847,49 @@ FILL_GENERIC_TILE 5,32
INIT_YMM avx2
FILL_GENERIC_TILE 4,16
FILL_GENERIC_TILE 5,32
+
+;------------------------------------------------------------------------------
+; MERGE_LINE 1:dst, 2:src, 3:m_tmp, 4:size
+;------------------------------------------------------------------------------
+
+%macro MERGE_LINE 4
+%if ((%4) & (mmsize - 1)) == 0
+%assign %%i 0
+%rep (%4) / mmsize
+ mova m%3, [%1 + %%i]
+ pmaxub m%3, [%2 + %%i]
+ mova [%1 + %%i], m%3
+%assign %%i %%i + mmsize
+%endrep
+%elif (%4) == 16
+ mova xm%3, [%1]
+ pmaxub xm%3, [%2]
+ mova [%1], xm%3
+%else
+ %error "invalid line size"
+%endif
+%endmacro
+
+;------------------------------------------------------------------------------
+; MERGE_TILE 1:tile_order, 2:suffix
+; void merge_tile%2(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile);
+;------------------------------------------------------------------------------
+
+%macro MERGE_TILE 2
+cglobal merge_tile%2, 3,3,1
+%assign %%offs 0
+%rep (1 << %1) - 1
+ MERGE_LINE r0, r2 + %%offs, 0, 1 << %1
+ add r0, r1
+%assign %%offs %%offs + (1 << %1)
+%endrep
+ MERGE_LINE r0, r2 + %%offs, 0, 1 << %1
+ RET
+%endmacro
+
+INIT_XMM sse2
+MERGE_TILE 4,16
+MERGE_TILE 5,32
+INIT_YMM avx2
+MERGE_TILE 4,16
+MERGE_TILE 5,32