diff options
-rw-r--r-- | libass/Makefile.am | 6 | ||||
-rw-r--r-- | libass/ass_bitmap.c | 126 | ||||
-rw-r--r-- | libass/ass_bitmap.h | 70 | ||||
-rw-r--r-- | libass/ass_func_template.h | 80 | ||||
-rw-r--r-- | libass/ass_rasterizer.c | 92 | ||||
-rw-r--r-- | libass/ass_rasterizer.h | 46 | ||||
-rw-r--r-- | libass/ass_render.c | 110 | ||||
-rw-r--r-- | libass/ass_render.h | 17 | ||||
-rw-r--r-- | libass/x86/be_blur.h | 30 | ||||
-rw-r--r-- | libass/x86/blend_bitmaps.h | 56 | ||||
-rw-r--r-- | libass/x86/rasterizer.h | 56 |
11 files changed, 297 insertions, 392 deletions
diff --git a/libass/Makefile.am b/libass/Makefile.am index 31dd9aa..13c533e 100644 --- a/libass/Makefile.am +++ b/libass/Makefile.am @@ -13,9 +13,9 @@ yasm_verbose_0 = @echo " YASM " $@; .asm.lo: $(yasm_verbose)$(LIBTOOL) $(AM_V_lt) --mode=compile $(AS) $(ASFLAGS) -o $@ $< -prefer-non-pic -SRC_INTEL = x86/blend_bitmaps.asm x86/cpuid.asm x86/blend_bitmaps.h x86/cpuid.h -SRC_INTEL64 = x86/be_blur.asm x86/be_blur.h -SRC_INTEL_RASTERIZER = x86/rasterizer.asm x86/rasterizer.h +SRC_INTEL = x86/blend_bitmaps.asm x86/cpuid.asm x86/cpuid.h +SRC_INTEL64 = x86/be_blur.asm +SRC_INTEL_RASTERIZER = x86/rasterizer.asm SRC_RASTERIZER = ass_rasterizer.h ass_rasterizer.c ass_rasterizer_c.c diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c index aa92d50..827c721 100644 --- a/libass/ass_bitmap.c +++ b/libass/ass_bitmap.c @@ -33,10 +33,30 @@ #include "ass_bitmap.h" #include "ass_render.h" + +#define ALIGN C_ALIGN_ORDER +#define DECORATE(func) ass_##func##_c +#include "ass_func_template.h" +#undef ALIGN +#undef DECORATE + #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM -#include "x86/be_blur.h" + +#define ALIGN 4 +#define DECORATE(func) ass_##func##_sse2 +#include "ass_func_template.h" +#undef ALIGN +#undef DECORATE + +#define ALIGN 5 +#define DECORATE(func) ass_##func##_avx2 +#include "ass_func_template.h" +#undef ALIGN +#undef DECORATE + #endif + static const unsigned base = 256; struct ass_synth_priv { @@ -51,8 +71,6 @@ struct ass_synth_priv { unsigned *gt2; double radius; - - BEBlurFunc be_blur_func; }; static bool generate_tables(ASS_SynthPriv *priv, double radius) @@ -137,7 +155,8 @@ static bool resize_tmp(ASS_SynthPriv *priv, int w, int h) return !!priv->tmp; } -void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be, +void ass_synth_blur(const BitmapEngine *engine, + ASS_SynthPriv *priv_blur, int opaque_box, int be, double blur_radius, Bitmap *bm_g, Bitmap *bm_o) { if(blur_radius > 0.0 || be){ @@ -175,20 +194,12 @@ void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be, be_blur_pre(buf, w, h, stride); while(--passes){ memset(tmp, 0, stride * 2); - if(w < 16){ - be_blur_c(buf, w, h, stride, tmp); - }else{ - priv_blur->be_blur_func(buf, w, h, stride, tmp); - } + engine->be_blur(buf, w, h, stride, tmp); } be_blur_post(buf, w, h, stride); } memset(tmp, 0, stride * 2); - if(w < 16){ - be_blur_c(buf, w, h, stride, tmp); - }else{ - priv_blur->be_blur_func(buf, w, h, stride, tmp); - } + engine->be_blur(buf, w, h, stride, tmp); } } if (!bm_o || opaque_box) { @@ -202,12 +213,12 @@ void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be, be_blur_pre(buf, w, h, stride); while(--passes){ memset(tmp, 0, stride * 2); - priv_blur->be_blur_func(buf, w, h, stride, tmp); + engine->be_blur(buf, w, h, stride, tmp); } be_blur_post(buf, w, h, stride); } memset(tmp, 0, stride * 2); - priv_blur->be_blur_func(buf, w, h, stride, tmp); + engine->be_blur(buf, w, h, stride, tmp); } } } @@ -220,16 +231,6 @@ ASS_SynthPriv *ass_synth_init(double radius) free(priv); return NULL; } - #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM - int avx2 = has_avx2(); - #ifdef __x86_64__ - priv->be_blur_func = avx2 ? ass_be_blur_avx2 : ass_be_blur_sse2; - #else - priv->be_blur_func = be_blur_c; - #endif - #else - priv->be_blur_func = be_blur_c; - #endif return priv; } @@ -242,36 +243,42 @@ void ass_synth_done(ASS_SynthPriv *priv) free(priv); } -static Bitmap *alloc_bitmap_raw(int w, int h) +static bool alloc_bitmap_buffer(const BitmapEngine *engine, Bitmap *bm, int w, int h) { - Bitmap *bm; - - unsigned align = (w >= 32) ? 32 : ((w >= 16) ? 16 : 1); + unsigned align = 1 << engine->align_order; size_t s = ass_align(align, w); // Too often we use ints as offset for bitmaps => use INT_MAX. if (s > (INT_MAX - 32) / FFMAX(h, 1)) - return NULL; - bm = malloc(sizeof(Bitmap)); + return false; + uint8_t *buf = ass_aligned_alloc(align, s * h + 32); + if (!buf) + return false; + bm->w = w; + bm->h = h; + bm->stride = s; + bm->buffer = buf; + return true; +} + +static Bitmap *alloc_bitmap_raw(const BitmapEngine *engine, int w, int h) +{ + Bitmap *bm = malloc(sizeof(Bitmap)); if (!bm) return NULL; - bm->buffer = ass_aligned_alloc(align, s * h + 32); - if (!bm->buffer) { + if (!alloc_bitmap_buffer(engine, bm, w, h)) { free(bm); return NULL; } - bm->w = w; - bm->h = h; - bm->stride = s; - bm->left = bm->top = 0; return bm; } -Bitmap *alloc_bitmap(int w, int h) +Bitmap *alloc_bitmap(const BitmapEngine *engine, int w, int h) { - Bitmap *bm = alloc_bitmap_raw(w, h); + Bitmap *bm = alloc_bitmap_raw(engine, w, h); if(!bm) return NULL; memset(bm->buffer, 0, bm->stride * bm->h + 32); + bm->left = bm->top = 0; return bm; } @@ -282,9 +289,9 @@ void ass_free_bitmap(Bitmap *bm) free(bm); } -Bitmap *copy_bitmap(const Bitmap *src) +Bitmap *copy_bitmap(const BitmapEngine *engine, const Bitmap *src) { - Bitmap *dst = alloc_bitmap_raw(src->w, src->h); + Bitmap *dst = alloc_bitmap_raw(engine, src->w, src->h); if (!dst) return NULL; dst->left = src->left; @@ -298,7 +305,7 @@ Bitmap *copy_bitmap(const Bitmap *src) Bitmap *outline_to_bitmap(ASS_Renderer *render_priv, ASS_Outline *outline, int bord) { - ASS_Rasterizer *rst = &render_priv->rasterizer; + RasterizerData *rst = &render_priv->rasterizer; if (!rasterizer_set_outline(rst, outline)) { ass_msg(render_priv->library, MSGL_WARN, "Failed to process glyph outline!\n"); return NULL; @@ -308,7 +315,7 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv, return NULL; if (rst->x_min >= rst->x_max || rst->y_min >= rst->y_max) { - Bitmap *bm = alloc_bitmap(2 * bord, 2 * bord); + Bitmap *bm = alloc_bitmap(render_priv->engine, 2 * bord, 2 * bord); if (!bm) return NULL; bm->left = bm->top = -bord; @@ -325,7 +332,7 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv, int w = x_max - x_min; int h = y_max - y_min; - int mask = (1 << rst->tile_order) - 1; + int mask = (1 << render_priv->engine->tile_order) - 1; if (w < 0 || h < 0 || w > 8000000 / FFMAX(h, 1) || w > INT_MAX - (2 * bord + mask) || h > INT_MAX - (2 * bord + mask)) { @@ -336,13 +343,13 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv, int tile_w = (w + 2 * bord + mask) & ~mask; int tile_h = (h + 2 * bord + mask) & ~mask; - Bitmap *bm = alloc_bitmap_raw(tile_w, tile_h); + Bitmap *bm = alloc_bitmap_raw(render_priv->engine, tile_w, tile_h); if (!bm) return NULL; bm->left = x_min - bord; bm->top = y_min - bord; - if (!rasterizer_fill(rst, bm->buffer, + if (!rasterizer_fill(render_priv->engine, rst, bm->buffer, x_min - bord, y_min - bord, bm->stride, tile_h, bm->stride)) { ass_msg(render_priv->library, MSGL_WARN, "Failed to rasterize glyph!\n"); @@ -651,9 +658,8 @@ void ass_gauss_blur(unsigned char *buffer, unsigned *tmp2, * This blur is the same as the one employed by vsfilter. * Pure C implementation. */ -void be_blur_c(uint8_t *buf, intptr_t w, - intptr_t h, intptr_t stride, - uint16_t *tmp) +void ass_be_blur_c(uint8_t *buf, intptr_t w, intptr_t h, + intptr_t stride, uint16_t *tmp) { uint16_t *col_pix_buf = tmp; uint16_t *col_sum_buf = tmp + w; @@ -800,9 +806,9 @@ int outline_to_bitmap2(ASS_Renderer *render_priv, * \brief Add two bitmaps together at a given position * Uses additive blending, clipped to [0,255]. Pure C implementation. */ -void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride, - uint8_t *src, intptr_t src_stride, - intptr_t height, intptr_t width) +void ass_add_bitmaps_c(uint8_t *dst, intptr_t dst_stride, + uint8_t *src, intptr_t src_stride, + intptr_t height, intptr_t width) { unsigned out; uint8_t* end = dst + dst_stride * height; @@ -816,9 +822,9 @@ void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride, } } -void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride, - uint8_t *src, intptr_t src_stride, - intptr_t height, intptr_t width) +void ass_sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride, + uint8_t *src, intptr_t src_stride, + intptr_t height, intptr_t width) { short out; uint8_t* end = dst + dst_stride * height; @@ -832,10 +838,10 @@ void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride, } } -void mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride, - uint8_t *src1, intptr_t src1_stride, - uint8_t *src2, intptr_t src2_stride, - intptr_t w, intptr_t h) +void ass_mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride, + uint8_t *src1, intptr_t src1_stride, + uint8_t *src2, intptr_t src2_stride, + intptr_t w, intptr_t h) { uint8_t* end = src1 + src1_stride * h; while (src1 < end) { diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h index 901adc7..7817d33 100644 --- a/libass/ass_bitmap.h +++ b/libass/ass_bitmap.h @@ -19,6 +19,7 @@ #ifndef LIBASS_BITMAP_H #define LIBASS_BITMAP_H +#include <stdbool.h> #include <ft2build.h> #include FT_GLYPH_H @@ -29,6 +30,51 @@ typedef struct ass_synth_priv ASS_SynthPriv; ASS_SynthPriv *ass_synth_init(double); void ass_synth_done(ASS_SynthPriv *priv); +struct segment; +typedef void (*FillSolidTileFunc)(uint8_t *buf, ptrdiff_t stride, int set); +typedef void (*FillHalfplaneTileFunc)(uint8_t *buf, ptrdiff_t stride, + int32_t a, int32_t b, int64_t c, int32_t scale); +typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride, + const struct segment *line, size_t n_lines, + int winding); + +typedef void (*BitmapBlendFunc)(uint8_t *dst, intptr_t dst_stride, + uint8_t *src, intptr_t src_stride, + intptr_t height, intptr_t width); +typedef void (*BitmapMulFunc)(uint8_t *dst, intptr_t dst_stride, + uint8_t *src1, intptr_t src1_stride, + uint8_t *src2, intptr_t src2_stride, + intptr_t width, intptr_t height); + +typedef void (*BeBlurFunc)(uint8_t *buf, intptr_t w, intptr_t h, + intptr_t stride, uint16_t *tmp); + +#define C_ALIGN_ORDER 5 + +typedef struct { + int align_order; // log2(alignment) + + // rasterizer functions +#if CONFIG_RASTERIZER + int tile_order; // log2(tile_size) + FillSolidTileFunc fill_solid; + FillHalfplaneTileFunc fill_halfplane; + FillGenericTileFunc fill_generic; +#endif + + // blend functions + BitmapBlendFunc add_bitmaps, sub_bitmaps; + BitmapMulFunc mul_bitmaps; + + // be blur function + BeBlurFunc be_blur; +} BitmapEngine; + +extern const BitmapEngine ass_bitmap_engine_c; +extern const BitmapEngine ass_bitmap_engine_sse2; +extern const BitmapEngine ass_bitmap_engine_avx2; + + typedef struct { size_t n_contours, max_contours; size_t *contours; @@ -46,12 +92,15 @@ typedef struct { unsigned char *buffer; // h * stride buffer } Bitmap; +Bitmap *alloc_bitmap(const BitmapEngine *engine, int w, int h); +Bitmap *copy_bitmap(const BitmapEngine *engine, const Bitmap *src); +void ass_free_bitmap(Bitmap *bm); + Bitmap *outline_to_bitmap(ASS_Renderer *render_priv, ASS_Outline *outline, int bord); -Bitmap *alloc_bitmap(int w, int h); - -void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be, +void ass_synth_blur(const BitmapEngine *engine, + ASS_SynthPriv *priv_blur, int opaque_box, int be, double blur_radius, Bitmap *bm_g, Bitmap *bm_o); /** @@ -65,30 +114,15 @@ int outline_to_bitmap2(ASS_Renderer *render_priv, ASS_Outline *outline, ASS_Outline *border, Bitmap **bm_g, Bitmap **bm_o); -void ass_free_bitmap(Bitmap *bm); void ass_gauss_blur(unsigned char *buffer, unsigned *tmp2, int width, int height, int stride, unsigned *m2, int r, int mwidth); int be_padding(int be); -void be_blur_c(uint8_t *buf, intptr_t w, - intptr_t h, intptr_t stride, - uint16_t *tmp); void be_blur_pre(uint8_t *buf, intptr_t w, intptr_t h, intptr_t stride); void be_blur_post(uint8_t *buf, intptr_t w, intptr_t h, intptr_t stride); -void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride, - uint8_t *src, intptr_t src_stride, - intptr_t height, intptr_t width); -void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride, - uint8_t *src, intptr_t src_stride, - intptr_t height, intptr_t width); -void mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride, - uint8_t *src1, intptr_t src1_stride, - uint8_t *src2, intptr_t src2_stride, - intptr_t w, intptr_t h); void shift_bitmap(Bitmap *bm, int shift_x, int shift_y); void fix_outline(Bitmap *bm_g, Bitmap *bm_o); -Bitmap *copy_bitmap(const Bitmap *src); #endif /* LIBASS_BITMAP_H */ diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h new file mode 100644 index 0000000..6ffc730 --- /dev/null +++ b/libass/ass_func_template.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2015 Vabishchevich Nikolay <vabnick@gmail.com> + * + * This file is part of libass. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + + + +void DECORATE(fill_solid_tile16)(uint8_t *buf, ptrdiff_t stride, int set); +void DECORATE(fill_solid_tile32)(uint8_t *buf, ptrdiff_t stride, int set); +void DECORATE(fill_halfplane_tile16)(uint8_t *buf, ptrdiff_t stride, + int32_t a, int32_t b, int64_t c, int32_t scale); +void DECORATE(fill_halfplane_tile32)(uint8_t *buf, ptrdiff_t stride, + int32_t a, int32_t b, int64_t c, int32_t scale); +void DECORATE(fill_generic_tile16)(uint8_t *buf, ptrdiff_t stride, + const struct segment *line, size_t n_lines, + int winding); +void DECORATE(fill_generic_tile32)(uint8_t *buf, ptrdiff_t stride, + const struct segment *line, size_t n_lines, + int winding); + +void DECORATE(add_bitmaps)(uint8_t *dst, intptr_t dst_stride, + uint8_t *src, intptr_t src_stride, + intptr_t height, intptr_t width); +void DECORATE(sub_bitmaps)(uint8_t *dst, intptr_t dst_stride, + uint8_t *src, intptr_t src_stride, + intptr_t height, intptr_t width); +void DECORATE(mul_bitmaps)(uint8_t *dst, intptr_t dst_stride, + uint8_t *src1, intptr_t src1_stride, + uint8_t *src2, intptr_t src2_stride, + intptr_t width, intptr_t height); + +void DECORATE(be_blur)(uint8_t *buf, intptr_t w, intptr_t h, + intptr_t stride, uint16_t *tmp); + + +const BitmapEngine DECORATE(bitmap_engine) = { + .align_order = ALIGN, + +#if CONFIG_RASTERIZER +#if CONFIG_LARGE_TILES + .tile_order = 5, + .fill_solid = DECORATE(fill_solid_tile32), + .fill_halfplane = DECORATE(fill_halfplane_tile32), + .fill_generic = DECORATE(fill_generic_tile32), +#else + .tile_order = 4, + .fill_solid = DECORATE(fill_solid_tile16), + .fill_halfplane = DECORATE(fill_halfplane_tile16), + .fill_generic = DECORATE(fill_generic_tile16), +#endif +#endif + + .add_bitmaps = DECORATE(add_bitmaps), +#ifdef __x86_64__ + .sub_bitmaps = DECORATE(sub_bitmaps), + .mul_bitmaps = DECORATE(mul_bitmaps), +#else + .sub_bitmaps = ass_sub_bitmaps_c, + .mul_bitmaps = ass_mul_bitmaps_c, +#endif + +#ifdef __x86_64__ + .be_blur = DECORATE(be_blur), +#else + .be_blur = ass_be_blur_c, +#endif +}; diff --git a/libass/ass_rasterizer.c b/libass/ass_rasterizer.c index 05463de..8a0ee3b 100644 --- a/libass/ass_rasterizer.c +++ b/libass/ass_rasterizer.c @@ -47,8 +47,9 @@ static inline int ilog2(uint32_t n) // XXX: different compilers } -void rasterizer_init(ASS_Rasterizer *rst) +void rasterizer_init(RasterizerData *rst, int outline_error) { + rst->outline_error = outline_error; rst->linebuf[0] = rst->linebuf[1] = NULL; rst->size[0] = rst->capacity[0] = 0; rst->size[1] = rst->capacity[1] = 0; @@ -60,7 +61,7 @@ void rasterizer_init(ASS_Rasterizer *rst) * \param delta requested size increase * \return zero on error */ -static inline int check_capacity(ASS_Rasterizer *rst, int index, size_t delta) +static inline int check_capacity(RasterizerData *rst, int index, size_t delta) { delta += rst->size[index]; if (rst->capacity[index] >= delta) @@ -78,7 +79,7 @@ static inline int check_capacity(ASS_Rasterizer *rst, int index, size_t delta) return 1; } -void rasterizer_done(ASS_Rasterizer *rst) +void rasterizer_done(RasterizerData *rst) { free(rst->linebuf[0]); free(rst->linebuf[1]); @@ -145,7 +146,7 @@ static inline int segment_subdivide(const OutlineSegment *seg, /** * \brief Add new segment to polyline */ -static inline int add_line(ASS_Rasterizer *rst, OutlinePoint pt0, OutlinePoint pt1) +static inline int add_line(RasterizerData *rst, OutlinePoint pt0, OutlinePoint pt1) { int32_t x = pt1.x - pt0.x; int32_t y = pt1.y - pt0.y; @@ -192,7 +193,7 @@ static inline int add_line(ASS_Rasterizer *rst, OutlinePoint pt0, OutlinePoint p * \brief Add quadratic spline to polyline * Performs recursive subdivision if necessary. */ -static int add_quadratic(ASS_Rasterizer *rst, +static int add_quadratic(RasterizerData *rst, OutlinePoint pt0, OutlinePoint pt1, OutlinePoint pt2) { OutlineSegment seg; @@ -218,7 +219,7 @@ static int add_quadratic(ASS_Rasterizer *rst, * \brief Add cubic spline to polyline * Performs recursive subdivision if necessary. */ -static int add_cubic(ASS_Rasterizer *rst, +static int add_cubic(RasterizerData *rst, OutlinePoint pt0, OutlinePoint pt1, OutlinePoint pt2, OutlinePoint pt3) { OutlineSegment seg; @@ -251,7 +252,7 @@ static int add_cubic(ASS_Rasterizer *rst, } -int rasterizer_set_outline(ASS_Rasterizer *rst, const ASS_Outline *path) +int rasterizer_set_outline(RasterizerData *rst, const ASS_Outline *path) { enum Status { S_ON, S_Q, S_C1, S_C2 @@ -612,56 +613,56 @@ static int polyline_split_vert(const struct segment *src, size_t n_src, } -static inline void rasterizer_fill_solid(ASS_Rasterizer *rst, +static inline void rasterizer_fill_solid(const BitmapEngine *engine, uint8_t *buf, int width, int height, ptrdiff_t stride, int set) { - assert(!(width & ((1 << rst->tile_order) - 1))); - assert(!(height & ((1 << rst->tile_order) - 1))); + assert(!(width & ((1 << engine->tile_order) - 1))); + assert(!(height & ((1 << engine->tile_order) - 1))); int i, j; - ptrdiff_t step = 1 << rst->tile_order; - ptrdiff_t tile_stride = stride * (1 << rst->tile_order); - width >>= rst->tile_order; - height >>= rst->tile_order; + ptrdiff_t step = 1 << engine->tile_order; + ptrdiff_t tile_stride = stride * (1 << engine->tile_order); + width >>= engine->tile_order; + height >>= engine->tile_order; for (j = 0; j < height; ++j) { for (i = 0; i < width; ++i) - rst->fill_solid(buf + i * step, stride, set); + engine->fill_solid(buf + i * step, stride, set); buf += tile_stride; } } -static inline void rasterizer_fill_halfplane(ASS_Rasterizer *rst, +static inline void rasterizer_fill_halfplane(const BitmapEngine *engine, uint8_t *buf, int width, int height, ptrdiff_t stride, int32_t a, int32_t b, int64_t c, int32_t scale) { - assert(!(width & ((1 << rst->tile_order) - 1))); - assert(!(height & ((1 << rst->tile_order) - 1))); - if (width == 1 << rst->tile_order && height == 1 << rst->tile_order) { - rst->fill_halfplane(buf, stride, a, b, c, scale); + assert(!(width & ((1 << engine->tile_order) - 1))); + assert(!(height & ((1 << engine->tile_order) - 1))); + if (width == 1 << engine->tile_order && height == 1 << engine->tile_order) { + engine->fill_halfplane(buf, stride, a, b, c, scale); return; } uint32_t abs_a = a < 0 ? -a : a; uint32_t abs_b = b < 0 ? -b : b; - int64_t size = (int64_t)(abs_a + abs_b) << (rst->tile_order + 5); - int64_t offs = ((int64_t)a + b) * (1 << (rst->tile_order + 5)); + int64_t size = (int64_t)(abs_a + abs_b) << (engine->tile_order + 5); + int64_t offs = ((int64_t)a + b) * (1 << (engine->tile_order + 5)); int i, j; - ptrdiff_t step = 1 << rst->tile_order; - ptrdiff_t tile_stride = stride * (1 << rst->tile_order); - width >>= rst->tile_order; - height >>= rst->tile_order; + ptrdiff_t step = 1 << engine->tile_order; + ptrdiff_t tile_stride = stride * (1 << engine->tile_order); + width >>= engine->tile_order; + height >>= engine->tile_order; for (j = 0; j < height; ++j) { for (i = 0; i < width; ++i) { - int64_t cc = c - (a * (int64_t)i + b * (int64_t)j) * (1 << (rst->tile_order + 6)); + int64_t cc = c - (a * (int64_t)i + b * (int64_t)j) * (1 << (engine->tile_order + 6)); int64_t offs_c = offs - cc; int64_t abs_c = offs_c < 0 ? -offs_c : offs_c; if (abs_c < size) - rst->fill_halfplane(buf + i * step, stride, a, b, cc, scale); + engine->fill_halfplane(buf + i * step, stride, a, b, cc, scale); else - rst->fill_solid(buf + i * step, stride, - ((uint32_t)(offs_c >> 32) ^ scale) & 0x80000000); + engine->fill_solid(buf + i * step, stride, + ((uint32_t)(offs_c >> 32) ^ scale) & 0x80000000); } buf += tile_stride; } @@ -676,18 +677,19 @@ static inline void rasterizer_fill_halfplane(ASS_Rasterizer *rst, * Rasterizes (possibly recursive) one quad-tree level. * Truncates used input buffer. */ -static int rasterizer_fill_level(ASS_Rasterizer *rst, - uint8_t *buf, int width, int height, ptrdiff_t stride, int index, size_t offs, int winding) +static int rasterizer_fill_level(const BitmapEngine *engine, RasterizerData *rst, + uint8_t *buf, int width, int height, ptrdiff_t stride, + int index, size_t offs, int winding) { assert(width > 0 && height > 0); assert((unsigned)index < 2u && offs <= rst->size[index]); - assert(!(width & ((1 << rst->tile_order) - 1))); - assert(!(height & ((1 << rst->tile_order) - 1))); + assert(!(width & ((1 << engine->tile_order) - 1))); + assert(!(height & ((1 << engine->tile_order) - 1))); size_t n = rst->size[index] - offs; struct segment *line = rst->linebuf[index] + offs; if (!n) { - rasterizer_fill_solid(rst, buf, width, height, stride, winding); + rasterizer_fill_solid(engine, buf, width, height, stride, winding); return 1; } if (n == 1) { @@ -701,16 +703,16 @@ static int rasterizer_fill_level(ASS_Rasterizer *rst, if (winding - 1) flag ^= 3; if (flag & 1) - rasterizer_fill_halfplane(rst, buf, width, height, stride, + rasterizer_fill_halfplane(engine, buf, width, height, stride, line->a, line->b, line->c, flag & 2 ? -line->scale : line->scale); else - rasterizer_fill_solid(rst, buf, width, height, stride, flag & 2); + rasterizer_fill_solid(engine, buf, width, height, stride, flag & 2); rst->size[index] = offs; return 1; } - if (width == 1 << rst->tile_order && height == 1 << rst->tile_order) { - rst->fill_generic(buf, stride, line, rst->size[index] - offs, winding); + if (width == 1 << engine->tile_order && height == 1 << engine->tile_order) { + engine->fill_generic(buf, stride, line, rst->size[index] - offs, winding); rst->size[index] = offs; return 1; } @@ -739,21 +741,21 @@ static int rasterizer_fill_level(ASS_Rasterizer *rst, rst->size[index ^ 0] = dst0 - rst->linebuf[index ^ 0]; rst->size[index ^ 1] = dst1 - rst->linebuf[index ^ 1]; - if (!rasterizer_fill_level(rst, buf, width, height, stride, index ^ 0, offs, winding)) + if (!rasterizer_fill_level(engine, rst, buf, width, height, stride, index ^ 0, offs, winding)) return 0; assert(rst->size[index ^ 0] == offs); - if (!rasterizer_fill_level(rst, buf1, width1, height1, stride, index ^ 1, offs1, winding1)) + if (!rasterizer_fill_level(engine, rst, buf1, width1, height1, stride, index ^ 1, offs1, winding1)) return 0; assert(rst->size[index ^ 1] == offs1); return 1; } -int rasterizer_fill(ASS_Rasterizer *rst, +int rasterizer_fill(const BitmapEngine *engine, RasterizerData *rst, uint8_t *buf, int x0, int y0, int width, int height, ptrdiff_t stride) { assert(width > 0 && height > 0); - assert(!(width & ((1 << rst->tile_order) - 1))); - assert(!(height & ((1 << rst->tile_order) - 1))); + assert(!(width & ((1 << engine->tile_order) - 1))); + assert(!(height & ((1 << engine->tile_order) - 1))); x0 *= 1 << 6; y0 *= 1 << 6; size_t n = rst->size[0]; @@ -805,6 +807,6 @@ int rasterizer_fill(ASS_Rasterizer *rst, } rst->size[index] = n; rst->size[index ^ 1] = 0; - return rasterizer_fill_level(rst, buf, width, height, stride, + return rasterizer_fill_level(engine, rst, buf, width, height, stride, index, 0, winding); } diff --git a/libass/ass_rasterizer.h b/libass/ass_rasterizer.h index d20feb3..73cdba4 100644 --- a/libass/ass_rasterizer.h +++ b/libass/ass_rasterizer.h @@ -22,7 +22,7 @@ #include <stddef.h> #include <stdint.h> -#include "ass.h" +#include "ass_bitmap.h" #include "ass_font.h" @@ -42,48 +42,25 @@ struct segment { int32_t x_min, x_max, y_min, y_max; }; - -typedef void (*FillSolidTileFunc)(uint8_t *buf, ptrdiff_t stride, int set); -typedef void (*FillHalfplaneTileFunc)(uint8_t *buf, ptrdiff_t stride, - int32_t a, int32_t b, int64_t c, int32_t scale); -typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride, - const struct segment *line, size_t n_lines, - int winding); - -void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride, int set); -void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride, int set); -void ass_fill_halfplane_tile16_c(uint8_t *buf, ptrdiff_t stride, - int32_t a, int32_t b, int64_t c, int32_t scale); -void ass_fill_halfplane_tile32_c(uint8_t *buf, ptrdiff_t stride, - int32_t a, int32_t b, int64_t c, int32_t scale); -void ass_fill_generic_tile16_c(uint8_t *buf, ptrdiff_t stride, - const struct segment *line, size_t n_lines, - int winding); -void ass_fill_generic_tile32_c(uint8_t *buf, ptrdiff_t stride, - const struct segment *line, size_t n_lines, - int winding); - -typedef struct ass_rasterizer { +typedef struct { int outline_error; // acceptable error (in 1/64 pixel units) - int tile_order; // log2(tile_size) - FillSolidTileFunc fill_solid; - FillHalfplaneTileFunc fill_halfplane; - FillGenericTileFunc fill_generic; - - int32_t x_min, x_max, y_min, y_max; // usable after rasterizer_set_outline + // usable after rasterizer_set_outline + int32_t x_min, x_max, y_min, y_max; // internal buffers struct segment *linebuf[2]; size_t size[2], capacity[2]; -} ASS_Rasterizer; +} RasterizerData; + +void rasterizer_init(RasterizerData *rst, int outline_error); +void rasterizer_done(RasterizerData *rst); -void rasterizer_init(ASS_Rasterizer *rst); -void rasterizer_done(ASS_Rasterizer *rst); /** * \brief Convert FreeType outline to polyline and calculate exact bounds */ -int rasterizer_set_outline(ASS_Rasterizer *rst, const ASS_Outline *path); +int rasterizer_set_outline(RasterizerData *rst, const ASS_Outline *path); + /** * \brief Polyline rasterization function * \param x0, y0, width, height in: source window (full pixel units) @@ -92,7 +69,8 @@ int rasterizer_set_outline(ASS_Rasterizer *rst, const ASS_Outline *path); * \return zero on error * Deletes preprocessed polyline after work. */ -int rasterizer_fill(ASS_Rasterizer *rst, uint8_t *buf, int x0, int y0, +int rasterizer_fill(const BitmapEngine *engine, RasterizerData *rst, + uint8_t *buf, int x0, int y0, int width, int height, ptrdiff_t stride); diff --git a/libass/ass_render.c b/libass/ass_render.c index 357e1cc..0995b96 100644 --- a/libass/ass_render.c +++ b/libass/ass_render.c @@ -34,12 +34,6 @@ #define SUBPIXEL_MASK 63 #define SUBPIXEL_ACCURACY 7 -#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM - -#include "x86/blend_bitmaps.h" -#include "x86/rasterizer.h" - -#endif // ASM ASS_Renderer *ass_renderer_init(ASS_Library *library) { @@ -70,56 +64,19 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library) priv->ftlibrary = ft; // images_root and related stuff is zero-filled in calloc - #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM - int sse2 = has_sse2(); - int avx2 = has_avx2(); - priv->add_bitmaps_func = avx2 ? ass_add_bitmaps_avx2 : - (sse2 ? ass_add_bitmaps_sse2 : ass_add_bitmaps_x86); - #ifdef __x86_64__ - priv->mul_bitmaps_func = avx2 ? ass_mul_bitmaps_avx2 : ass_mul_bitmaps_sse2; - priv->sub_bitmaps_func = avx2 ? ass_sub_bitmaps_avx2 : ass_sub_bitmaps_sse2; - #else - priv->mul_bitmaps_func = mul_bitmaps_c; - priv->sub_bitmaps_func = ass_sub_bitmaps_x86; - #endif - #else - priv->add_bitmaps_func = add_bitmaps_c; - priv->sub_bitmaps_func = sub_bitmaps_c; - priv->mul_bitmaps_func = mul_bitmaps_c; - #endif - -#if CONFIG_RASTERIZER -#if CONFIG_LARGE_TILES - priv->rasterizer.tile_order = 5; - #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM - priv->rasterizer.fill_solid = avx2 ? ass_fill_solid_tile32_avx2 : - (sse2 ? ass_fill_solid_tile32_sse2 : ass_fill_solid_tile32_c); - priv->rasterizer.fill_halfplane = avx2 ? ass_fill_halfplane_tile32_avx2 : - (sse2 ? ass_fill_halfplane_tile32_sse2 : ass_fill_halfplane_tile32_c); - priv->rasterizer.fill_generic = avx2 ? ass_fill_generic_tile32_avx2 : - (sse2 ? ass_fill_generic_tile32_sse2 : ass_fill_generic_tile32_c); - #else - priv->rasterizer.fill_solid = ass_fill_solid_tile32_c; - priv->rasterizer.fill_halfplane = ass_fill_halfplane_tile32_c; - priv->rasterizer.fill_generic = ass_fill_generic_tile32_c; - #endif +#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM + if (has_avx2()) + priv->engine = &ass_bitmap_engine_avx2; + else if (has_sse2()) + priv->engine = &ass_bitmap_engine_sse2; + else + priv->engine = &ass_bitmap_engine_c; #else - priv->rasterizer.tile_order = 4; - #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM - priv->rasterizer.fill_solid = avx2 ? ass_fill_solid_tile16_avx2 : - (sse2 ? ass_fill_solid_tile16_sse2 : ass_fill_solid_tile16_c); - priv->rasterizer.fill_halfplane = avx2 ? ass_fill_halfplane_tile16_avx2 : - (sse2 ? ass_fill_halfplane_tile16_sse2 : ass_fill_halfplane_tile16_c); - priv->rasterizer.fill_generic = avx2 ? ass_fill_generic_tile16_avx2 : - (sse2 ? ass_fill_generic_tile16_sse2 : ass_fill_generic_tile16_c); - #else - priv->rasterizer.fill_solid = ass_fill_solid_tile16_c; - priv->rasterizer.fill_halfplane = ass_fill_halfplane_tile16_c; - priv->rasterizer.fill_generic = ass_fill_generic_tile16_c; - #endif + priv->engine = &ass_bitmap_engine_c; #endif - priv->rasterizer.outline_error = 16; - rasterizer_init(&priv->rasterizer); + |