diff options
author | Dr.Smile <vabnick@gmail.com> | 2022-08-11 14:14:59 +0300 |
---|---|---|
committer | Dr.Smile <vabnick@gmail.com> | 2022-12-04 02:17:38 +0300 |
commit | 59f54fd94bc713594a8f4fa492f6b8380cde40aa (patch) | |
tree | 4c14ea1a69b25eebd3c3b2d251949645f43e3bde /libass | |
parent | d3563e80a88c83d9253ea044dd2676d0914e4822 (diff) | |
download | libass-59f54fd94bc713594a8f4fa492f6b8380cde40aa.tar.bz2 libass-59f54fd94bc713594a8f4fa492f6b8380cde40aa.tar.xz |
rasterizer: deduplicate tile functions
Also it's now possible to switch between large/small tiles at runtime.
It would be needed for checkasm.
Diffstat (limited to 'libass')
-rw-r--r-- | libass/Makefile_library.am | 2 | ||||
-rw-r--r-- | libass/ass_bitmap_engine.c | 43 | ||||
-rw-r--r-- | libass/ass_bitmap_engine.h | 1 | ||||
-rw-r--r-- | libass/ass_render.c | 6 | ||||
-rw-r--r-- | libass/c/c_rasterizer.c | 381 | ||||
-rw-r--r-- | libass/c/rasterizer_template.h | 236 |
6 files changed, 267 insertions, 402 deletions
diff --git a/libass/Makefile_library.am b/libass/Makefile_library.am index 8810056..c13ac7b 100644 --- a/libass/Makefile_library.am +++ b/libass/Makefile_library.am @@ -28,7 +28,7 @@ libass_libass_la_SOURCES = \ libass/ass_rasterizer.h libass/ass_rasterizer.c \ libass/ass_render.h libass/ass_render.c libass/ass_render_api.c \ libass/ass_bitmap_engine.h libass/ass_bitmap_engine.c \ - libass/c/c_rasterizer.c \ + libass/c/rasterizer_template.h libass/c/c_rasterizer.c \ libass/c/c_blend_bitmaps.c \ libass/c/c_be_blur.c \ libass/c/c_blur.c \ diff --git a/libass/ass_bitmap_engine.c b/libass/ass_bitmap_engine.c index ef3f6b3..e87a688 100644 --- a/libass/ass_bitmap_engine.c +++ b/libass/ass_bitmap_engine.c @@ -50,8 +50,8 @@ ParamFilterFunc PARAM_BLUR_SET(horz_ ## suffix); \ ParamFilterFunc PARAM_BLUR_SET(vert_ ## suffix); -#define BITMAP_ENGINE(align_order_, tile_order_, tile_size, suffix) \ - const BitmapEngine ass_bitmap_engine_ ## suffix = { \ +#define BITMAP_ENGINE(align_order_, tile_order_, tile_size, suffix, be_suffix) \ + const BitmapEngine ass_bitmap_engine_ ## be_suffix = { \ .align_order = align_order_, \ .tile_order = tile_order_, \ .fill_solid = ass_fill_solid_tile ## tile_size ## _ ## suffix, \ @@ -73,34 +73,25 @@ }; -GENERIC_PROTOTYPES(c) -#if CONFIG_LARGE_TILES -RASTERIZER_PROTOTYPES(32, c) -BITMAP_ENGINE(C_ALIGN_ORDER, 5, 32, c) -#else RASTERIZER_PROTOTYPES(16, c) -BITMAP_ENGINE(C_ALIGN_ORDER, 4, 16, c) -#endif +RASTERIZER_PROTOTYPES(32, c) +GENERIC_PROTOTYPES(c) +BITMAP_ENGINE(C_ALIGN_ORDER, 4, 16, c, c) +BITMAP_ENGINE(C_ALIGN_ORDER, 5, 32, c, lt_c) #if CONFIG_ASM && ARCH_X86 -GENERIC_PROTOTYPES(sse2) -#if CONFIG_LARGE_TILES -RASTERIZER_PROTOTYPES(32, sse2) -BITMAP_ENGINE(4, 5, 32, sse2) -#else RASTERIZER_PROTOTYPES(16, sse2) -BITMAP_ENGINE(4, 4, 16, sse2) -#endif +RASTERIZER_PROTOTYPES(32, sse2) +GENERIC_PROTOTYPES(sse2) +BITMAP_ENGINE(4, 4, 16, sse2, sse2) +BITMAP_ENGINE(4, 5, 32, sse2, lt_sse2) -GENERIC_PROTOTYPES(avx2) -#if CONFIG_LARGE_TILES -RASTERIZER_PROTOTYPES(32, avx2) -BITMAP_ENGINE(5, 5, 32, avx2) -#else RASTERIZER_PROTOTYPES(16, avx2) -BITMAP_ENGINE(5, 4, 16, avx2) -#endif +RASTERIZER_PROTOTYPES(32, avx2) +GENERIC_PROTOTYPES(avx2) +BITMAP_ENGINE(5, 4, 16, avx2, avx2) +BITMAP_ENGINE(5, 5, 32, avx2, lt_avx2) #endif @@ -153,10 +144,10 @@ const BitmapEngine *ass_bitmap_engine_init(unsigned mask) unsigned flags = ass_get_cpu_flags(mask); #if ARCH_X86 if (flags & ASS_CPU_FLAG_X86_AVX2) - return &ass_bitmap_engine_avx2; + return mask & ASS_FLAG_LARGE_TILES ? &ass_bitmap_engine_lt_avx2 : &ass_bitmap_engine_avx2; if (flags & ASS_CPU_FLAG_X86_SSE2) - return &ass_bitmap_engine_sse2; + return mask & ASS_FLAG_LARGE_TILES ? &ass_bitmap_engine_lt_sse2 : &ass_bitmap_engine_sse2; #endif #endif - return &ass_bitmap_engine_c; + return mask & ASS_FLAG_LARGE_TILES ? &ass_bitmap_engine_lt_c : &ass_bitmap_engine_c; } diff --git a/libass/ass_bitmap_engine.h b/libass/ass_bitmap_engine.h index 3fbf6f2..4f223b0 100644 --- a/libass/ass_bitmap_engine.h +++ b/libass/ass_bitmap_engine.h @@ -87,6 +87,7 @@ enum { ASS_CPU_FLAG_X86_AVX2 = 0x0002, #endif ASS_CPU_FLAG_ALL = 0x0FFF, + ASS_FLAG_LARGE_TILES = 0x1000, }; unsigned ass_get_cpu_flags(unsigned mask); diff --git a/libass/ass_render.c b/libass/ass_render.c index 29a4374..64268c1 100644 --- a/libass/ass_render.c +++ b/libass/ass_render.c @@ -106,7 +106,11 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library) priv->ftlibrary = ft; // images_root and related stuff is zero-filled in calloc - priv->engine = ass_bitmap_engine_init(ASS_CPU_FLAG_ALL); + unsigned flags = ASS_CPU_FLAG_ALL; +#if CONFIG_LARGE_TILES + flags |= ASS_FLAG_LARGE_TILES; +#endif + priv->engine = ass_bitmap_engine_init(flags); if (!ass_rasterizer_init(priv->engine, &priv->state.rasterizer, RASTERIZER_PRECISION)) goto fail; diff --git a/libass/c/c_rasterizer.c b/libass/c/c_rasterizer.c index 1f9d8d8..fe7fa9f 100644 --- a/libass/c/c_rasterizer.c +++ b/libass/c/c_rasterizer.c @@ -19,382 +19,15 @@ #include "config.h" #include "ass_compat.h" -#include "ass_utils.h" -#include "ass_rasterizer.h" #include <assert.h> +#include "ass_rasterizer.h" -void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride, int set) -{ - uint8_t value = set ? 255 : 0; - for (int y = 0; y < 16; y++) { - for (int x = 0; x < 16; x++) - buf[x] = value; - buf += stride; - } -} - -void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride, int set) -{ - uint8_t value = set ? 255 : 0; - for (int y = 0; y < 32; y++) { - for (int x = 0; x < 32; x++) - buf[x] = value; - buf += stride; - } -} - - -/* - * Halfplane Filling Functions - * - * Fill pixels with antialiasing corresponding to equation - * A * x + B * y < C, where - * x, y - offset of pixel center from bottom-left, - * A = a * scale, B = b * scale, C = c * scale / 64. - * - * Normalization of coefficients prior call: - * max(abs(a), abs(b)) * scale = 1 << 61 - * - * Used Algorithm - * Let - * max_ab = max(abs(A), abs(B)), - * min_ab = min(abs(A), abs(B)), - * CC = C - A * x - B * y, then - * result = (clamp((CC - min_ab / 4) / max_ab) + - * clamp((CC + min_ab / 4) / max_ab) + - * 1) / 2, - * where clamp(Z) = max(-0.5, min(0.5, Z)). - */ - -void ass_fill_halfplane_tile16_c(uint8_t *buf, ptrdiff_t stride, - int32_t a, int32_t b, int64_t c, int32_t scale) -{ - int16_t aa = (a * (int64_t) scale + ((int64_t) 1 << 49)) >> 50; - int16_t bb = (b * (int64_t) scale + ((int64_t) 1 << 49)) >> 50; - int16_t cc = ((int32_t) (c >> 11) * (int64_t) scale + ((int64_t) 1 << 44)) >> 45; - cc += (1 << 9) - ((aa + bb) >> 1); - - int16_t abs_a = aa < 0 ? -aa : aa; - int16_t abs_b = bb < 0 ? -bb : bb; - int16_t delta = (FFMIN(abs_a, abs_b) + 2) >> 2; - - int16_t va1[16], va2[16]; - for (int x = 0; x < 16; x++) { - va1[x] = aa * x - delta; - va2[x] = aa * x + delta; - } - - static const int16_t full = 1 << 10; - for (int y = 0; y < 16; y++) { - for (int x = 0; x < 16; x++) { - int16_t c1 = cc - va1[x]; - int16_t c2 = cc - va2[x]; - c1 = FFMINMAX(c1, 0, full); - c2 = FFMINMAX(c2, 0, full); - int16_t res = (c1 + c2) >> 3; - buf[x] = FFMIN(res, 255); - } - buf += stride; - cc -= bb; - } -} - -void ass_fill_halfplane_tile32_c(uint8_t *buf, ptrdiff_t stride, - int32_t a, int32_t b, int64_t c, int32_t scale) -{ - int16_t aa = (a * (int64_t) scale + ((int64_t) 1 << 50)) >> 51; - int16_t bb = (b * (int64_t) scale + ((int64_t) 1 << 50)) >> 51; - int16_t cc = ((int32_t) (c >> 12) * (int64_t) scale + ((int64_t) 1 << 44)) >> 45; - cc += (1 << 8) - ((aa + bb) >> 1); - - int16_t abs_a = aa < 0 ? -aa : aa; - int16_t abs_b = bb < 0 ? -bb : bb; - int16_t delta = (FFMIN(abs_a, abs_b) + 2) >> 2; - - int16_t va1[32], va2[32]; - for (int x = 0; x < 32; x++) { - va1[x] = aa * x - delta; - va2[x] = aa * x + delta; - } - - static const int16_t full = 1 << 9; - for (int y = 0; y < 32; y++) { - for (int x = 0; x < 32; x++) { - int16_t c1 = cc - va1[x]; - int16_t c2 = cc - va2[x]; - c1 = FFMINMAX(c1, 0, full); - c2 = FFMINMAX(c2, 0, full); - int16_t res = (c1 + c2) >> 2; - buf[x] = FFMIN(res, 255); - } - buf += stride; - cc -= bb; - } -} - - -/* - * Generic Filling Functions - * - * Used Algorithm - * Construct trapeziod from each polyline segment and its projection into left side of tile. - * Render that trapeziod into internal buffer with additive blending and correct sign. - * Store clamped absolute value from internal buffer into result buffer. - */ - -// Render top/bottom line of the trapeziod with antialiasing -static inline void update_border_line16(int16_t res[16], - int16_t abs_a, const int16_t va[16], - int16_t b, int16_t abs_b, - int16_t c, int up, int dn) -{ - int16_t size = dn - up; - int16_t w = (1 << 10) + (size << 4) - abs_a; - w = FFMIN(w, 1 << 10) << 3; - - int16_t dc_b = abs_b * (int32_t) size >> 6; - int16_t dc = (FFMIN(abs_a, dc_b) + 2) >> 2; - - int16_t base = (int32_t) b * (int16_t) (up + dn) >> 7; - int16_t offs1 = size - ((base + dc) * (int32_t) w >> 16); - int16_t offs2 = size - ((base - dc) * (int32_t) w >> 16); - - size <<= 1; - for (int x = 0; x < 16; x++) { - int16_t cw = (c - va[x]) * (int32_t) w >> 16; - int16_t c1 = cw + offs1; - int16_t c2 = cw + offs2; - c1 = FFMINMAX(c1, 0, size); - c2 = FFMINMAX(c2, 0, size); - res[x] += c1 + c2; - } -} - -void ass_fill_generic_tile16_c(uint8_t *buf, ptrdiff_t stride, - const struct segment *line, size_t n_lines, - int winding) -{ - int16_t res[16][16], delta[18]; - for (int y = 0; y < 16; y++) - for (int x = 0; x < 16; x++) - res[y][x] = 0; - for (int y = 0; y < 18; y++) - delta[y] = 0; - - static const int16_t full = 1 << 10; - const struct segment *end = line + n_lines; - for (; line != end; ++line) { - assert(line->y_min >= 0 && line->y_min < 1 << 10); - assert(line->y_max > 0 && line->y_max <= 1 << 10); - assert(line->y_min <= line->y_max); - - int16_t up_delta = line->flags & SEGFLAG_DN ? 4 : 0; - int16_t dn_delta = up_delta; - if (!line->x_min && (line->flags & SEGFLAG_EXACT_LEFT)) dn_delta ^= 4; - if (line->flags & SEGFLAG_UL_DR) { - int16_t tmp = up_delta; - up_delta = dn_delta; - dn_delta = tmp; - } - - int up = line->y_min >> 6, dn = line->y_max >> 6; - int16_t up_pos = line->y_min & 63; - int16_t up_delta1 = up_delta * up_pos; - int16_t dn_pos = line->y_max & 63; - int16_t dn_delta1 = dn_delta * dn_pos; - delta[up + 1] -= up_delta1; - delta[up] -= (up_delta << 6) - up_delta1; - delta[dn + 1] += dn_delta1; - delta[dn] += (dn_delta << 6) - dn_delta1; - if (line->y_min == line->y_max) - continue; - - int16_t a = (line->a * (int64_t) line->scale + ((int64_t) 1 << 49)) >> 50; - int16_t b = (line->b * (int64_t) line->scale + ((int64_t) 1 << 49)) >> 50; - int16_t c = ((int32_t) (line->c >> 11) * (int64_t) line->scale + ((int64_t) 1 << 44)) >> 45; - c -= (a >> 1) + b * up; - - int16_t va[16]; - for (int x = 0; x < 16; x++) - va[x] = a * x; - int16_t abs_a = a < 0 ? -a : a; - int16_t abs_b = b < 0 ? -b : b; - int16_t dc = (FFMIN(abs_a, abs_b) + 2) >> 2; - int16_t base = (1 << 9) - (b >> 1); - int16_t dc1 = base + dc; - int16_t dc2 = base - dc; - - if (up_pos) { - if (dn == up) { - update_border_line16(res[up], abs_a, va, b, abs_b, c, up_pos, dn_pos); - continue; - } - update_border_line16(res[up], abs_a, va, b, abs_b, c, up_pos, 64); - up++; - c -= b; - } - for (int y = up; y < dn; y++) { - for (int x = 0; x < 16; x++) { - int16_t c1 = c - va[x] + dc1; - int16_t c2 = c - va[x] + dc2; - c1 = FFMINMAX(c1, 0, full); - c2 = FFMINMAX(c2, 0, full); - res[y][x] += (c1 + c2) >> 3; - } - c -= b; - } - if (dn_pos) - update_border_line16(res[dn], abs_a, va, b, abs_b, c, 0, dn_pos); - } - - int16_t cur = 256 * winding; - for (int y = 0; y < 16; y++) { - cur += delta[y]; - for (int x = 0; x < 16; x++) { - int16_t val = res[y][x] + cur, neg_val = -val; - val = (val > neg_val ? val : neg_val); - buf[x] = FFMIN(val, 255); - } - buf += stride; - } -} - -// Render top/bottom line of the trapeziod with antialiasing -static inline void update_border_line32(int16_t res[32], - int16_t abs_a, const int16_t va[32], - int16_t b, int16_t abs_b, - int16_t c, int up, int dn) -{ - int16_t size = dn - up; - int16_t w = (1 << 9) + (size << 3) - abs_a; - w = FFMIN(w, 1 << 9) << 5; - - int16_t dc_b = abs_b * (int32_t) size >> 6; - int16_t dc = (FFMIN(abs_a, dc_b) + 2) >> 2; - - int16_t base = (int32_t) b * (int16_t) (up + dn) >> 7; - int16_t offs1 = size - ((base + dc) * (int32_t) w >> 16); - int16_t offs2 = size - ((base - dc) * (int32_t) w >> 16); - - size <<= 1; - for (int x = 0; x < 32; x++) { - int16_t cw = (c - va[x]) * (int32_t) w >> 16; - int16_t c1 = cw + offs1; - int16_t c2 = cw + offs2; - c1 = FFMINMAX(c1, 0, size); - c2 = FFMINMAX(c2, 0, size); - res[x] += c1 + c2; - } -} - -void ass_fill_generic_tile32_c(uint8_t *buf, ptrdiff_t stride, - const struct segment *line, size_t n_lines, - int winding) -{ - int16_t res[32][32], delta[34]; - for (int y = 0; y < 32; y++) - for (int x = 0; x < 32; x++) - res[y][x] = 0; - for (int y = 0; y < 34; y++) - delta[y] = 0; - - static const int16_t full = 1 << 9; - const struct segment *end = line + n_lines; - for (; line != end; ++line) { - assert(line->y_min >= 0 && line->y_min < 1 << 11); - assert(line->y_max > 0 && line->y_max <= 1 << 11); - assert(line->y_min <= line->y_max); - - int16_t up_delta = line->flags & SEGFLAG_DN ? 4 : 0; - int16_t dn_delta = up_delta; - if (!line->x_min && (line->flags & SEGFLAG_EXACT_LEFT)) dn_delta ^= 4; - if (line->flags & SEGFLAG_UL_DR) { - int16_t tmp = up_delta; - up_delta = dn_delta; - dn_delta = tmp; - } - - int up = line->y_min >> 6, dn = line->y_max >> 6; - int16_t up_pos = line->y_min & 63; - int16_t up_delta1 = up_delta * up_pos; - int16_t dn_pos = line->y_max & 63; - int16_t dn_delta1 = dn_delta * dn_pos; - delta[up + 1] -= up_delta1; - delta[up] -= (up_delta << 6) - up_delta1; - delta[dn + 1] += dn_delta1; - delta[dn] += (dn_delta << 6) - dn_delta1; - if (line->y_min == line->y_max) - continue; - - int16_t a = (line->a * (int64_t) line->scale + ((int64_t) 1 << 50)) >> 51; - int16_t b = (line->b * (int64_t) line->scale + ((int64_t) 1 << 50)) >> 51; - int16_t c = ((int32_t) (line->c >> 12) * (int64_t) line->scale + ((int64_t) 1 << 44)) >> 45; - c -= (a >> 1) + b * up; - - int16_t va[32]; - for (int x = 0; x < 32; x++) - va[x] = a * x; - int16_t abs_a = a < 0 ? -a : a; - int16_t abs_b = b < 0 ? -b : b; - int16_t dc = (FFMIN(abs_a, abs_b) + 2) >> 2; - int16_t base = (1 << 8) - (b >> 1); - int16_t dc1 = base + dc; - int16_t dc2 = base - dc; - - if (up_pos) { - if (dn == up) { - update_border_line32(res[up], abs_a, va, b, abs_b, c, up_pos, dn_pos); - continue; - } - update_border_line32(res[up], abs_a, va, b, abs_b, c, up_pos, 64); - up++; - c -= b; - } - for (int y = up; y < dn; y++) { - for (int x = 0; x < 32; x++) { - int16_t c1 = c - va[x] + dc1; - int16_t c2 = c - va[x] + dc2; - c1 = FFMINMAX(c1, 0, full); - c2 = FFMINMAX(c2, 0, full); - res[y][x] += (c1 + c2) >> 2; - } - c -= b; - } - if (dn_pos) - update_border_line32(res[dn], abs_a, va, b, abs_b, c, 0, dn_pos); - } - - int16_t cur = 256 * winding; - for (int y = 0; y < 32; y++) { - cur += delta[y]; - for (int x = 0; x < 32; x++) { - int16_t val = res[y][x] + cur, neg_val = -val; - val = (val > neg_val ? val : neg_val); - buf[x] = FFMIN(val, 255); - } - buf += stride; - } -} - - -void ass_merge_tile16_c(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile) -{ - for (int y = 0; y < 16; y++) { - for (int x = 0; x < 16; x++) - buf[x] = FFMAX(buf[x], tile[x]); - buf += stride; - tile += 16; - } -} +#define TILE_SIZE 16 +#include "rasterizer_template.h" +#undef TILE_SIZE -void ass_merge_tile32_c(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile) -{ - for (int y = 0; y < 32; y++) { - for (int x = 0; x < 32; x++) - buf[x] = FFMAX(buf[x], tile[x]); - buf += stride; - tile += 32; - } -} +#define TILE_SIZE 32 +#include "rasterizer_template.h" +#undef TILE_SIZE diff --git a/libass/c/rasterizer_template.h b/libass/c/rasterizer_template.h new file mode 100644 index 0000000..2fe1569 --- /dev/null +++ b/libass/c/rasterizer_template.h @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2014-2022 libass contributors + * + * This file is part of libass. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if TILE_SIZE == 16 +#define SUFFIX(name) name ## 16_c +#define TILE_ORDER 4 +#elif TILE_SIZE == 32 +#define SUFFIX(name) name ## 32_c +#define TILE_ORDER 5 +#else +#error Unsupported tile size +#endif + +#define FULL_VALUE (1 << (14 - TILE_ORDER)) +#define RESCALE_AB(ab, scale) \ + (((ab) * (int64_t) (scale) + ((int64_t) 1 << (45 + TILE_ORDER))) >> (46 + TILE_ORDER)) +#define RESCALE_C(c, scale) \ + (((int32_t) ((c) >> (7 + TILE_ORDER)) * (int64_t) (scale) + ((int64_t) 1 << 44)) >> 45) + + +void SUFFIX(ass_fill_solid_tile)(uint8_t *buf, ptrdiff_t stride, int set) +{ + uint8_t value = set ? 255 : 0; + for (int y = 0; y < TILE_SIZE; y++) { + for (int x = 0; x < TILE_SIZE; x++) + buf[x] = value; + buf += stride; + } +} + + +/* + * Halfplane Filling Functions + * + * Fill pixels with antialiasing corresponding to equation + * A * x + B * y < C, where + * x, y - offset of pixel center from bottom-left, + * A = a * scale, B = b * scale, C = c * scale / 64. + * + * Normalization of coefficients prior call: + * max(abs(a), abs(b)) * scale = 1 << 61 + * + * Used Algorithm + * Let + * max_ab = max(abs(A), abs(B)), + * min_ab = min(abs(A), abs(B)), + * CC = C - A * x - B * y, then + * result = (clamp((CC - min_ab / 4) / max_ab) + + * clamp((CC + min_ab / 4) / max_ab) + + * 1) / 2, + * where clamp(Z) = max(-0.5, min(0.5, Z)). + */ + +void SUFFIX(ass_fill_halfplane_tile)(uint8_t *buf, ptrdiff_t stride, + int32_t a, int32_t b, int64_t c, int32_t scale) +{ + int16_t aa = RESCALE_AB(a, scale), bb = RESCALE_AB(b, scale); + int16_t cc = RESCALE_C(c, scale) + FULL_VALUE / 2 - ((aa + bb) >> 1); + + int16_t abs_a = aa < 0 ? -aa : aa; + int16_t abs_b = bb < 0 ? -bb : bb; + int16_t delta = (FFMIN(abs_a, abs_b) + 2) >> 2; + + int16_t va1[TILE_SIZE], va2[TILE_SIZE]; + for (int x = 0; x < TILE_SIZE; x++) { + va1[x] = aa * x - delta; + va2[x] = aa * x + delta; + } + + for (int y = 0; y < TILE_SIZE; y++) { + for (int x = 0; x < TILE_SIZE; x++) { + int16_t c1 = cc - va1[x]; + int16_t c2 = cc - va2[x]; + c1 = FFMINMAX(c1, 0, FULL_VALUE); + c2 = FFMINMAX(c2, 0, FULL_VALUE); + int16_t res = (c1 + c2) >> (7 - TILE_ORDER); + buf[x] = FFMIN(res, 255); + } + buf += stride; + cc -= bb; + } +} + + +/* + * Generic Filling Functions + * + * Used Algorithm + * Construct trapeziod from each polyline segment and its projection into left side of tile. + * Render that trapeziod into internal buffer with additive blending and correct sign. + * Store clamped absolute value from internal buffer into result buffer. + */ + +// Render top/bottom line of the trapeziod with antialiasing +static inline void SUFFIX(update_border_line)(int16_t res[TILE_SIZE], + int16_t abs_a, const int16_t va[TILE_SIZE], + int16_t b, int16_t abs_b, + int16_t c, int up, int dn) +{ + int16_t size = dn - up; + int16_t w = FULL_VALUE + (size << (8 - TILE_ORDER)) - abs_a; + w = FFMIN(w, FULL_VALUE) << (2 * TILE_ORDER - 5); + + int16_t dc_b = abs_b * (int32_t) size >> 6; + int16_t dc = (FFMIN(abs_a, dc_b) + 2) >> 2; + + int16_t base = (int32_t) b * (int16_t) (up + dn) >> 7; + int16_t offs1 = size - ((base + dc) * (int32_t) w >> 16); + int16_t offs2 = size - ((base - dc) * (int32_t) w >> 16); + + size <<= 1; + for (int x = 0; x < TILE_SIZE; x++) { + int16_t cw = (c - va[x]) * (int32_t) w >> 16; + int16_t c1 = cw + offs1; + int16_t c2 = cw + offs2; + c1 = FFMINMAX(c1, 0, size); + c2 = FFMINMAX(c2, 0, size); + res[x] += c1 + c2; + } +} + +void SUFFIX(ass_fill_generic_tile)(uint8_t *buf, ptrdiff_t stride, + const struct segment *line, size_t n_lines, + int winding) +{ + int16_t res[TILE_SIZE][TILE_SIZE] = {0}; + int16_t delta[TILE_SIZE + 2] = {0}; + + const struct segment *end = line + n_lines; + for (; line != end; ++line) { + assert(line->y_min >= 0 && line->y_min < 64 << TILE_ORDER); + assert(line->y_max > 0 && line->y_max <= 64 << TILE_ORDER); + assert(line->y_min <= line->y_max); + + int16_t up_delta = line->flags & SEGFLAG_DN ? 4 : 0; + int16_t dn_delta = up_delta; + if (!line->x_min && (line->flags & SEGFLAG_EXACT_LEFT)) dn_delta ^= 4; + if (line->flags & SEGFLAG_UL_DR) { + int16_t tmp = up_delta; + up_delta = dn_delta; + dn_delta = tmp; + } + + int up = line->y_min >> 6, dn = line->y_max >> 6; + int16_t up_pos = line->y_min & 63; + int16_t up_delta1 = up_delta * up_pos; + int16_t dn_pos = line->y_max & 63; + int16_t dn_delta1 = dn_delta * dn_pos; + delta[up + 1] -= up_delta1; + delta[up] -= (up_delta << 6) - up_delta1; + delta[dn + 1] += dn_delta1; + delta[dn] += (dn_delta << 6) - dn_delta1; + if (line->y_min == line->y_max) + continue; + + int16_t a = RESCALE_AB(line->a, line->scale); + int16_t b = RESCALE_AB(line->b, line->scale); + int16_t c = RESCALE_C(line->c, line->scale) - (a >> 1) - b * up; + + int16_t va[TILE_SIZE]; + for (int x = 0; x < TILE_SIZE; x++) + va[x] = a * x; + int16_t abs_a = a < 0 ? -a : a; + int16_t abs_b = b < 0 ? -b : b; + int16_t dc = (FFMIN(abs_a, abs_b) + 2) >> 2; + int16_t base = FULL_VALUE / 2 - (b >> 1); + int16_t dc1 = base + dc; + int16_t dc2 = base - dc; + + if (up_pos) { + if (dn == up) { + SUFFIX(update_border_line)(res[up], abs_a, va, b, abs_b, c, up_pos, dn_pos); + continue; + } + SUFFIX(update_border_line)(res[up], abs_a, va, b, abs_b, c, up_pos, 64); + up++; + c -= b; + } + for (int y = up; y < dn; y++) { + for (int x = 0; x < TILE_SIZE; x++) { + int16_t c1 = c - va[x] + dc1; + int16_t c2 = c - va[x] + dc2; + c1 = FFMINMAX(c1, 0, FULL_VALUE); + c2 = FFMINMAX(c2, 0, FULL_VALUE); + res[y][x] += (c1 + c2) >> (7 - TILE_ORDER); + } + c -= b; + } + if (dn_pos) + SUFFIX(update_border_line)(res[dn], abs_a, va, b, abs_b, c, 0, dn_pos); + } + + int16_t cur = 256 * (int8_t) winding; + for (int y = 0; y < TILE_SIZE; y++) { + cur += delta[y]; + for (int x = 0; x < TILE_SIZE; x++) { + int16_t val = res[y][x] + cur, neg_val = -val; + val = (val > neg_val ? val : neg_val); + buf[x] = FFMIN(val, 255); + } + buf += stride; + } +} + + +void SUFFIX(ass_merge_tile)(uint8_t *buf, ptrdiff_t stride, const uint8_t *tile) +{ + for (int y = 0; y < TILE_SIZE; y++) { + for (int x = 0; x < TILE_SIZE; x++) + buf[x] = FFMAX(buf[x], tile[x]); + buf += stride; + tile += TILE_SIZE; + } +} + + +#undef SUFFIX +#undef TILE_ORDER +#undef FULL_VALUE +#undef RESCALE_AB +#undef RESCALE_C |