diff options
author | Dr.Smile <vabnick@gmail.com> | 2021-09-26 05:06:54 +0300 |
---|---|---|
committer | Dr.Smile <vabnick@gmail.com> | 2021-09-26 17:13:07 +0300 |
commit | 813237b76e3219294a87160407d51932e53d5721 (patch) | |
tree | 093268e0dace26d43e8f68342b65bfb9bf8d0e7a | |
parent | 4c3ace7cdcc740652331dbd57cc500b4ad50be81 (diff) | |
download | libass-813237b76e3219294a87160407d51932e53d5721.tar.bz2 libass-813237b76e3219294a87160407d51932e53d5721.tar.xz |
rasterizer: use max-blending for outline pair merge
Max-blending is more correct than previous addition-blending
in case of small (less than pixel) outline offsets.
Fixes buffer overrun (up to 16 bytes read past rst->tile)
in add_bitmaps() in case of engine->tile_order < engine->align_order
(AVX2 assembly with LARGE_TILES disabled) due to insufficient padding.
-rw-r--r-- | libass/ass_bitmap.h | 2 | ||||
-rw-r--r-- | libass/ass_func_template.h | 4 | ||||
-rw-r--r-- | libass/ass_rasterizer.c | 3 | ||||
-rw-r--r-- | libass/ass_rasterizer_c.c | 21 | ||||
-rw-r--r-- | libass/x86/rasterizer.asm | 46 |
5 files changed, 74 insertions, 2 deletions
diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h index 60364cd..1a17fd5 100644 --- a/libass/ass_bitmap.h +++ b/libass/ass_bitmap.h @@ -34,6 +34,7 @@ typedef void (*FillHalfplaneTileFunc)(uint8_t *buf, ptrdiff_t stride, typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride, const struct segment *line, size_t n_lines, int winding); +typedef void (*MergeTileFunc)(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile); typedef void (*BitmapBlendFunc)(uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, @@ -67,6 +68,7 @@ typedef struct { FillSolidTileFunc fill_solid; FillHalfplaneTileFunc fill_halfplane; FillGenericTileFunc fill_generic; + MergeTileFunc merge_tile; // blend functions BitmapBlendFunc add_bitmaps, imul_bitmaps; diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h index 3556a76..c1ea12a 100644 --- a/libass/ass_func_template.h +++ b/libass/ass_func_template.h @@ -30,6 +30,8 @@ void DECORATE(fill_generic_tile16)(uint8_t *buf, ptrdiff_t stride, void DECORATE(fill_generic_tile32)(uint8_t *buf, ptrdiff_t stride, const struct segment *line, size_t n_lines, int winding); +void DECORATE(merge_tile16)(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile); +void DECORATE(merge_tile32)(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile); void DECORATE(add_bitmaps)(uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, @@ -97,11 +99,13 @@ const BitmapEngine DECORATE(bitmap_engine) = { .fill_solid = DECORATE(fill_solid_tile32), .fill_halfplane = DECORATE(fill_halfplane_tile32), .fill_generic = DECORATE(fill_generic_tile32), + .merge_tile = DECORATE(merge_tile32), #else .tile_order = 4, .fill_solid = DECORATE(fill_solid_tile16), .fill_halfplane = DECORATE(fill_halfplane_tile16), .fill_generic = DECORATE(fill_generic_tile16), + .merge_tile = DECORATE(merge_tile16), #endif .add_bitmaps = DECORATE(add_bitmaps), diff --git a/libass/ass_rasterizer.c b/libass/ass_rasterizer.c index 1edec57..f55c757 100644 --- a/libass/ass_rasterizer.c +++ b/libass/ass_rasterizer.c @@ -674,8 +674,7 @@ static bool rasterizer_fill_level(const BitmapEngine *engine, RasterizerData *rs else engine->fill_halfplane(rst->tile, width, line1->a, line1->b, line1->c, flags1 & FLAG_REVERSE ? -line1->scale : line1->scale); - // XXX: better to use max instead of add - engine->add_bitmaps(buf, stride, rst->tile, width, width, height); + engine->merge_tile(buf, stride, rst->tile); rst->size[index] = offs; return true; } diff --git a/libass/ass_rasterizer_c.c b/libass/ass_rasterizer_c.c index 6c81695..07459d0 100644 --- a/libass/ass_rasterizer_c.c +++ b/libass/ass_rasterizer_c.c @@ -377,3 +377,24 @@ void ass_fill_generic_tile32_c(uint8_t *buf, ptrdiff_t stride, buf += stride; } } + + +void ass_merge_tile16_c(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile) +{ + for (int y = 0; y < 16; y++) { + for (int x = 0; x < 16; x++) + buf[x] = FFMAX(buf[x], tile[x]); + buf += stride; + tile += 16; + } +} + +void ass_merge_tile32_c(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile) +{ + for (int y = 0; y < 32; y++) { + for (int x = 0; x < 32; x++) + buf[x] = FFMAX(buf[x], tile[x]); + buf += stride; + tile += 32; + } +} diff --git a/libass/x86/rasterizer.asm b/libass/x86/rasterizer.asm index 909eb47..f9853e7 100644 --- a/libass/x86/rasterizer.asm +++ b/libass/x86/rasterizer.asm @@ -847,3 +847,49 @@ FILL_GENERIC_TILE 5,32 INIT_YMM avx2 FILL_GENERIC_TILE 4,16 FILL_GENERIC_TILE 5,32 + +;------------------------------------------------------------------------------ +; MERGE_LINE 1:dst, 2:src, 3:m_tmp, 4:size +;------------------------------------------------------------------------------ + +%macro MERGE_LINE 4 +%if ((%4) & (mmsize - 1)) == 0 +%assign %%i 0 +%rep (%4) / mmsize + mova m%3, [%1 + %%i] + pmaxub m%3, [%2 + %%i] + mova [%1 + %%i], m%3 +%assign %%i %%i + mmsize +%endrep +%elif (%4) == 16 + mova xm%3, [%1] + pmaxub xm%3, [%2] + mova [%1], xm%3 +%else + %error "invalid line size" +%endif +%endmacro + +;------------------------------------------------------------------------------ +; MERGE_TILE 1:tile_order, 2:suffix +; void merge_tile%2(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile); +;------------------------------------------------------------------------------ + +%macro MERGE_TILE 2 +cglobal merge_tile%2, 3,3,1 +%assign %%offs 0 +%rep (1 << %1) - 1 + MERGE_LINE r0, r2 + %%offs, 0, 1 << %1 + add r0, r1 +%assign %%offs %%offs + (1 << %1) +%endrep + MERGE_LINE r0, r2 + %%offs, 0, 1 << %1 + RET +%endmacro + +INIT_XMM sse2 +MERGE_TILE 4,16 +MERGE_TILE 5,32 +INIT_YMM avx2 +MERGE_TILE 4,16 +MERGE_TILE 5,32 |