summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDr.Smile <vabnick@gmail.com>2015-06-26 02:14:21 +0300
committerDr.Smile <vabnick@gmail.com>2015-06-26 02:14:21 +0300
commit38a175cad5a894467cb7187ad0187db1ae3f89ff (patch)
treea7262be6620f31c2f0d8df26218c6160c5354f56
parent88f1fd37fe5785851f069e3dc6939f1ab905f4bc (diff)
downloadlibass-38a175cad5a894467cb7187ad0187db1ae3f89ff.tar.bz2
libass-38a175cad5a894467cb7187ad0187db1ae3f89ff.tar.xz
Switch to virtual function table
Use one pointer to table of functions instead of scattered bunch of function pointers. Different versions of these tables can be constructed in compile time. Also, bitmap memory alignment now depends only on SSE2/AVX2 support and is constant for every width. That simplifies code without noticeable performance penalty.
-rw-r--r--libass/Makefile.am6
-rw-r--r--libass/ass_bitmap.c126
-rw-r--r--libass/ass_bitmap.h70
-rw-r--r--libass/ass_func_template.h80
-rw-r--r--libass/ass_rasterizer.c92
-rw-r--r--libass/ass_rasterizer.h46
-rw-r--r--libass/ass_render.c110
-rw-r--r--libass/ass_render.h17
-rw-r--r--libass/x86/be_blur.h30
-rw-r--r--libass/x86/blend_bitmaps.h56
-rw-r--r--libass/x86/rasterizer.h56
11 files changed, 297 insertions, 392 deletions
diff --git a/libass/Makefile.am b/libass/Makefile.am
index 31dd9aa..13c533e 100644
--- a/libass/Makefile.am
+++ b/libass/Makefile.am
@@ -13,9 +13,9 @@ yasm_verbose_0 = @echo " YASM " $@;
.asm.lo:
$(yasm_verbose)$(LIBTOOL) $(AM_V_lt) --mode=compile $(AS) $(ASFLAGS) -o $@ $< -prefer-non-pic
-SRC_INTEL = x86/blend_bitmaps.asm x86/cpuid.asm x86/blend_bitmaps.h x86/cpuid.h
-SRC_INTEL64 = x86/be_blur.asm x86/be_blur.h
-SRC_INTEL_RASTERIZER = x86/rasterizer.asm x86/rasterizer.h
+SRC_INTEL = x86/blend_bitmaps.asm x86/cpuid.asm x86/cpuid.h
+SRC_INTEL64 = x86/be_blur.asm
+SRC_INTEL_RASTERIZER = x86/rasterizer.asm
SRC_RASTERIZER = ass_rasterizer.h ass_rasterizer.c ass_rasterizer_c.c
diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c
index aa92d50..827c721 100644
--- a/libass/ass_bitmap.c
+++ b/libass/ass_bitmap.c
@@ -33,10 +33,30 @@
#include "ass_bitmap.h"
#include "ass_render.h"
+
+#define ALIGN C_ALIGN_ORDER
+#define DECORATE(func) ass_##func##_c
+#include "ass_func_template.h"
+#undef ALIGN
+#undef DECORATE
+
#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
-#include "x86/be_blur.h"
+
+#define ALIGN 4
+#define DECORATE(func) ass_##func##_sse2
+#include "ass_func_template.h"
+#undef ALIGN
+#undef DECORATE
+
+#define ALIGN 5
+#define DECORATE(func) ass_##func##_avx2
+#include "ass_func_template.h"
+#undef ALIGN
+#undef DECORATE
+
#endif
+
static const unsigned base = 256;
struct ass_synth_priv {
@@ -51,8 +71,6 @@ struct ass_synth_priv {
unsigned *gt2;
double radius;
-
- BEBlurFunc be_blur_func;
};
static bool generate_tables(ASS_SynthPriv *priv, double radius)
@@ -137,7 +155,8 @@ static bool resize_tmp(ASS_SynthPriv *priv, int w, int h)
return !!priv->tmp;
}
-void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be,
+void ass_synth_blur(const BitmapEngine *engine,
+ ASS_SynthPriv *priv_blur, int opaque_box, int be,
double blur_radius, Bitmap *bm_g, Bitmap *bm_o)
{
if(blur_radius > 0.0 || be){
@@ -175,20 +194,12 @@ void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be,
be_blur_pre(buf, w, h, stride);
while(--passes){
memset(tmp, 0, stride * 2);
- if(w < 16){
- be_blur_c(buf, w, h, stride, tmp);
- }else{
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
- }
+ engine->be_blur(buf, w, h, stride, tmp);
}
be_blur_post(buf, w, h, stride);
}
memset(tmp, 0, stride * 2);
- if(w < 16){
- be_blur_c(buf, w, h, stride, tmp);
- }else{
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
- }
+ engine->be_blur(buf, w, h, stride, tmp);
}
}
if (!bm_o || opaque_box) {
@@ -202,12 +213,12 @@ void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be,
be_blur_pre(buf, w, h, stride);
while(--passes){
memset(tmp, 0, stride * 2);
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
+ engine->be_blur(buf, w, h, stride, tmp);
}
be_blur_post(buf, w, h, stride);
}
memset(tmp, 0, stride * 2);
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
+ engine->be_blur(buf, w, h, stride, tmp);
}
}
}
@@ -220,16 +231,6 @@ ASS_SynthPriv *ass_synth_init(double radius)
free(priv);
return NULL;
}
- #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
- int avx2 = has_avx2();
- #ifdef __x86_64__
- priv->be_blur_func = avx2 ? ass_be_blur_avx2 : ass_be_blur_sse2;
- #else
- priv->be_blur_func = be_blur_c;
- #endif
- #else
- priv->be_blur_func = be_blur_c;
- #endif
return priv;
}
@@ -242,36 +243,42 @@ void ass_synth_done(ASS_SynthPriv *priv)
free(priv);
}
-static Bitmap *alloc_bitmap_raw(int w, int h)
+static bool alloc_bitmap_buffer(const BitmapEngine *engine, Bitmap *bm, int w, int h)
{
- Bitmap *bm;
-
- unsigned align = (w >= 32) ? 32 : ((w >= 16) ? 16 : 1);
+ unsigned align = 1 << engine->align_order;
size_t s = ass_align(align, w);
// Too often we use ints as offset for bitmaps => use INT_MAX.
if (s > (INT_MAX - 32) / FFMAX(h, 1))
- return NULL;
- bm = malloc(sizeof(Bitmap));
+ return false;
+ uint8_t *buf = ass_aligned_alloc(align, s * h + 32);
+ if (!buf)
+ return false;
+ bm->w = w;
+ bm->h = h;
+ bm->stride = s;
+ bm->buffer = buf;
+ return true;
+}
+
+static Bitmap *alloc_bitmap_raw(const BitmapEngine *engine, int w, int h)
+{
+ Bitmap *bm = malloc(sizeof(Bitmap));
if (!bm)
return NULL;
- bm->buffer = ass_aligned_alloc(align, s * h + 32);
- if (!bm->buffer) {
+ if (!alloc_bitmap_buffer(engine, bm, w, h)) {
free(bm);
return NULL;
}
- bm->w = w;
- bm->h = h;
- bm->stride = s;
- bm->left = bm->top = 0;
return bm;
}
-Bitmap *alloc_bitmap(int w, int h)
+Bitmap *alloc_bitmap(const BitmapEngine *engine, int w, int h)
{
- Bitmap *bm = alloc_bitmap_raw(w, h);
+ Bitmap *bm = alloc_bitmap_raw(engine, w, h);
if(!bm)
return NULL;
memset(bm->buffer, 0, bm->stride * bm->h + 32);
+ bm->left = bm->top = 0;
return bm;
}
@@ -282,9 +289,9 @@ void ass_free_bitmap(Bitmap *bm)
free(bm);
}
-Bitmap *copy_bitmap(const Bitmap *src)
+Bitmap *copy_bitmap(const BitmapEngine *engine, const Bitmap *src)
{
- Bitmap *dst = alloc_bitmap_raw(src->w, src->h);
+ Bitmap *dst = alloc_bitmap_raw(engine, src->w, src->h);
if (!dst)
return NULL;
dst->left = src->left;
@@ -298,7 +305,7 @@ Bitmap *copy_bitmap(const Bitmap *src)
Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
ASS_Outline *outline, int bord)
{
- ASS_Rasterizer *rst = &render_priv->rasterizer;
+ RasterizerData *rst = &render_priv->rasterizer;
if (!rasterizer_set_outline(rst, outline)) {
ass_msg(render_priv->library, MSGL_WARN, "Failed to process glyph outline!\n");
return NULL;
@@ -308,7 +315,7 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
return NULL;
if (rst->x_min >= rst->x_max || rst->y_min >= rst->y_max) {
- Bitmap *bm = alloc_bitmap(2 * bord, 2 * bord);
+ Bitmap *bm = alloc_bitmap(render_priv->engine, 2 * bord, 2 * bord);
if (!bm)
return NULL;
bm->left = bm->top = -bord;
@@ -325,7 +332,7 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
int w = x_max - x_min;
int h = y_max - y_min;
- int mask = (1 << rst->tile_order) - 1;
+ int mask = (1 << render_priv->engine->tile_order) - 1;
if (w < 0 || h < 0 || w > 8000000 / FFMAX(h, 1) ||
w > INT_MAX - (2 * bord + mask) || h > INT_MAX - (2 * bord + mask)) {
@@ -336,13 +343,13 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
int tile_w = (w + 2 * bord + mask) & ~mask;
int tile_h = (h + 2 * bord + mask) & ~mask;
- Bitmap *bm = alloc_bitmap_raw(tile_w, tile_h);
+ Bitmap *bm = alloc_bitmap_raw(render_priv->engine, tile_w, tile_h);
if (!bm)
return NULL;
bm->left = x_min - bord;
bm->top = y_min - bord;
- if (!rasterizer_fill(rst, bm->buffer,
+ if (!rasterizer_fill(render_priv->engine, rst, bm->buffer,
x_min - bord, y_min - bord,
bm->stride, tile_h, bm->stride)) {
ass_msg(render_priv->library, MSGL_WARN, "Failed to rasterize glyph!\n");
@@ -651,9 +658,8 @@ void ass_gauss_blur(unsigned char *buffer, unsigned *tmp2,
* This blur is the same as the one employed by vsfilter.
* Pure C implementation.
*/
-void be_blur_c(uint8_t *buf, intptr_t w,
- intptr_t h, intptr_t stride,
- uint16_t *tmp)
+void ass_be_blur_c(uint8_t *buf, intptr_t w, intptr_t h,
+ intptr_t stride, uint16_t *tmp)
{
uint16_t *col_pix_buf = tmp;
uint16_t *col_sum_buf = tmp + w;
@@ -800,9 +806,9 @@ int outline_to_bitmap2(ASS_Renderer *render_priv,
* \brief Add two bitmaps together at a given position
* Uses additive blending, clipped to [0,255]. Pure C implementation.
*/
-void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width)
+void ass_add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src, intptr_t src_stride,
+ intptr_t height, intptr_t width)
{
unsigned out;
uint8_t* end = dst + dst_stride * height;
@@ -816,9 +822,9 @@ void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
}
}
-void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width)
+void ass_sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src, intptr_t src_stride,
+ intptr_t height, intptr_t width)
{
short out;
uint8_t* end = dst + dst_stride * height;
@@ -832,10 +838,10 @@ void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
}
}
-void mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src1, intptr_t src1_stride,
- uint8_t *src2, intptr_t src2_stride,
- intptr_t w, intptr_t h)
+void ass_mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src1, intptr_t src1_stride,
+ uint8_t *src2, intptr_t src2_stride,
+ intptr_t w, intptr_t h)
{
uint8_t* end = src1 + src1_stride * h;
while (src1 < end) {
diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h
index 901adc7..7817d33 100644
--- a/libass/ass_bitmap.h
+++ b/libass/ass_bitmap.h
@@ -19,6 +19,7 @@
#ifndef LIBASS_BITMAP_H
#define LIBASS_BITMAP_H
+#include <stdbool.h>
#include <ft2build.h>
#include FT_GLYPH_H
@@ -29,6 +30,51 @@ typedef struct ass_synth_priv ASS_SynthPriv;
ASS_SynthPriv *ass_synth_init(double);
void ass_synth_done(ASS_SynthPriv *priv);
+struct segment;
+typedef void (*FillSolidTileFunc)(uint8_t *buf, ptrdiff_t stride, int set);
+typedef void (*FillHalfplaneTileFunc)(uint8_t *buf, ptrdiff_t stride,
+ int32_t a, int32_t b, int64_t c, int32_t scale);
+typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride,
+ const struct segment *line, size_t n_lines,
+ int winding);
+
+typedef void (*BitmapBlendFunc)(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src, intptr_t src_stride,
+ intptr_t height, intptr_t width);
+typedef void (*BitmapMulFunc)(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src1, intptr_t src1_stride,
+ uint8_t *src2, intptr_t src2_stride,
+ intptr_t width, intptr_t height);
+
+typedef void (*BeBlurFunc)(uint8_t *buf, intptr_t w, intptr_t h,
+ intptr_t stride, uint16_t *tmp);
+
+#define C_ALIGN_ORDER 5
+
+typedef struct {
+ int align_order; // log2(alignment)
+
+ // rasterizer functions
+#if CONFIG_RASTERIZER
+ int tile_order; // log2(tile_size)
+ FillSolidTileFunc fill_solid;
+ FillHalfplaneTileFunc fill_halfplane;
+ FillGenericTileFunc fill_generic;
+#endif
+
+ // blend functions
+ BitmapBlendFunc add_bitmaps, sub_bitmaps;
+ BitmapMulFunc mul_bitmaps;
+
+ // be blur function
+ BeBlurFunc be_blur;
+} BitmapEngine;
+
+extern const BitmapEngine ass_bitmap_engine_c;
+extern const BitmapEngine ass_bitmap_engine_sse2;
+extern const BitmapEngine ass_bitmap_engine_avx2;
+
+
typedef struct {
size_t n_contours, max_contours;
size_t *contours;
@@ -46,12 +92,15 @@ typedef struct {
unsigned char *buffer; // h * stride buffer
} Bitmap;
+Bitmap *alloc_bitmap(const BitmapEngine *engine, int w, int h);
+Bitmap *copy_bitmap(const BitmapEngine *engine, const Bitmap *src);
+void ass_free_bitmap(Bitmap *bm);
+
Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
ASS_Outline *outline, int bord);
-Bitmap *alloc_bitmap(int w, int h);
-
-void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be,
+void ass_synth_blur(const BitmapEngine *engine,
+ ASS_SynthPriv *priv_blur, int opaque_box, int be,
double blur_radius, Bitmap *bm_g, Bitmap *bm_o);
/**
@@ -65,30 +114,15 @@ int outline_to_bitmap2(ASS_Renderer *render_priv,
ASS_Outline *outline, ASS_Outline *border,
Bitmap **bm_g, Bitmap **bm_o);
-void ass_free_bitmap(Bitmap *bm);
void ass_gauss_blur(unsigned char *buffer, unsigned *tmp2,
int width, int height, int stride,
unsigned *m2, int r, int mwidth);
int be_padding(int be);
-void be_blur_c(uint8_t *buf, intptr_t w,
- intptr_t h, intptr_t stride,
- uint16_t *tmp);
void be_blur_pre(uint8_t *buf, intptr_t w,
intptr_t h, intptr_t stride);
void be_blur_post(uint8_t *buf, intptr_t w,
intptr_t h, intptr_t stride);
-void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width);
-void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width);
-void mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src1, intptr_t src1_stride,
- uint8_t *src2, intptr_t src2_stride,
- intptr_t w, intptr_t h);
void shift_bitmap(Bitmap *bm, int shift_x, int shift_y);
void fix_outline(Bitmap *bm_g, Bitmap *bm_o);
-Bitmap *copy_bitmap(const Bitmap *src);
#endif /* LIBASS_BITMAP_H */
diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h
new file mode 100644
index 0000000..6ffc730
--- /dev/null
+++ b/libass/ass_func_template.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2015 Vabishchevich Nikolay <vabnick@gmail.com>
+ *
+ * This file is part of libass.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+
+void DECORATE(fill_solid_tile16)(uint8_t *buf, ptrdiff_t stride, int set);
+void DECORATE(fill_solid_tile32)(uint8_t *buf, ptrdiff_t stride, int set);
+void DECORATE(fill_halfplane_tile16)(uint8_t *buf, ptrdiff_t stride,
+ int32_t a, int32_t b, int64_t c, int32_t scale);
+void DECORATE(fill_halfplane_tile32)(uint8_t *buf, ptrdiff_t stride,
+ int32_t a, int32_t b, int64_t c, int32_t scale);
+void DECORATE(fill_generic_tile16)(uint8_t *buf, ptrdiff_t stride,
+ const struct segment *line, size_t n_lines,
+ int winding);
+void DECORATE(fill_generic_tile32)(uint8_t *buf, ptrdiff_t stride,
+ const struct segment *line, size_t n_lines,
+ int winding);
+
+void DECORATE(add_bitmaps)(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src, intptr_t src_stride,
+ intptr_t height, intptr_t width);
+void DECORATE(sub_bitmaps)(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src, intptr_t src_stride,
+ intptr_t height, intptr_t width);
+void DECORATE(mul_bitmaps)(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src1, intptr_t src1_stride,
+ uint8_t *src2, intptr_t src2_stride,
+ intptr_t width, intptr_t height);
+
+void DECORATE(be_blur)(uint8_t *buf, intptr_t w, intptr_t h,
+ intptr_t stride, uint16_t *tmp);
+
+
+const BitmapEngine DECORATE(bitmap_engine) = {
+ .align_order = ALIGN,
+
+#if CONFIG_RASTERIZER
+#if CONFIG_LARGE_TILES
+ .tile_order = 5,
+ .fill_solid = DECORATE(fill_solid_tile32),
+ .fill_halfplane = DECORATE(fill_halfplane_tile32),
+ .fill_generic = DECORATE(fill_generic_tile32),
+#else
+ .tile_order = 4,
+ .fill_solid = DECORATE(fill_solid_tile16),
+ .fill_halfplane = DECORATE(fill_halfplane_tile16),
+ .fill_generic = DECORATE(fill_generic_tile16),
+#endif
+#endif
+
+ .add_bitmaps = DECORATE(add_bitmaps),
+#ifdef __x86_64__
+ .sub_bitmaps = DECORATE(sub_bitmaps),
+ .mul_bitmaps = DECORATE(mul_bitmaps),
+#else
+ .sub_bitmaps = ass_sub_bitmaps_c,
+ .mul_bitmaps = ass_mul_bitmaps_c,
+#endif
+
+#ifdef __x86_64__
+ .be_blur = DECORATE(be_blur),
+#else
+ .be_blur = ass_be_blur_c,
+#endif
+};
diff --git a/libass/ass_rasterizer.c b/libass/ass_rasterizer.c
index 05463de..8a0ee3b 100644
--- a/libass/ass_rasterizer.c
+++ b/libass/ass_rasterizer.c
@@ -47,8 +47,9 @@ static inline int ilog2(uint32_t n) // XXX: different compilers
}
-void rasterizer_init(ASS_Rasterizer *rst)
+void rasterizer_init(RasterizerData *rst, int outline_error)
{
+ rst->outline_error = outline_error;
rst->linebuf[0] = rst->linebuf[1] = NULL;
rst->size[0] = rst->capacity[0] = 0;
rst->size[1] = rst->capacity[1] = 0;
@@ -60,7 +61,7 @@ void rasterizer_init(ASS_Rasterizer *rst)
* \param delta requested size increase
* \return zero on error
*/
-static inline int check_capacity(ASS_Rasterizer *rst, int index, size_t delta)
+static inline int check_capacity(RasterizerData *rst, int index, size_t delta)
{
delta += rst->size[index];
if (rst->capacity[index] >= delta)
@@ -78,7 +79,7 @@ static inline int check_capacity(ASS_Rasterizer *rst, int index, size_t delta)
return 1;
}
-void rasterizer_done(ASS_Rasterizer *rst)
+void rasterizer_done(RasterizerData *rst)
{
free(rst->linebuf[0]);
free(rst->linebuf[1]);
@@ -145,7 +146,7 @@ static inline int segment_subdivide(const OutlineSegment *seg,
/**
* \brief Add new segment to polyline
*/
-static inline int add_line(ASS_Rasterizer *rst, OutlinePoint pt0, OutlinePoint pt1)
+static inline int add_line(RasterizerData *rst, OutlinePoint pt0, OutlinePoint pt1)
{
int32_t x = pt1.x - pt0.x;
int32_t y = pt1.y - pt0.y;
@@ -192,7 +193,7 @@ static inline int add_line(ASS_Rasterizer *rst, OutlinePoint pt0, OutlinePoint p
* \brief Add quadratic spline to polyline
* Performs recursive subdivision if necessary.
*/
-static int add_quadratic(ASS_Rasterizer *rst,
+static int add_quadratic(RasterizerData *rst,
OutlinePoint pt0, OutlinePoint pt1, OutlinePoint pt2)
{
OutlineSegment seg;
@@ -218,7 +219,7 @@ static int add_quadratic(ASS_Rasterizer *rst,
* \brief Add cubic spline to polyline
* Performs recursive subdivision if necessary.
*/
-static int add_cubic(ASS_Rasterizer *rst,
+static int add_cubic(RasterizerData *rst,
OutlinePoint pt0, OutlinePoint pt1, OutlinePoint pt2, OutlinePoint pt3)
{
OutlineSegment seg;
@@ -251,7 +252,7 @@ static int add_cubic(ASS_Rasterizer *rst,
}
-int rasterizer_set_outline(ASS_Rasterizer *rst, const ASS_Outline *path)
+int rasterizer_set_outline(RasterizerData *rst, const ASS_Outline *path)
{
enum Status {
S_ON, S_Q, S_C1, S_C2
@@ -612,56 +613,56 @@ static int polyline_split_vert(const struct segment *src, size_t n_src,
}
-static inline void rasterizer_fill_solid(ASS_Rasterizer *rst,
+static inline void rasterizer_fill_solid(const BitmapEngine *engine,
uint8_t *buf, int width, int height, ptrdiff_t stride,
int set)
{
- assert(!(width & ((1 << rst->tile_order) - 1)));
- assert(!(height & ((1 << rst->tile_order) - 1)));
+ assert(!(width & ((1 << engine->tile_order) - 1)));
+ assert(!(height & ((1 << engine->tile_order) - 1)));
int i, j;
- ptrdiff_t step = 1 << rst->tile_order;
- ptrdiff_t tile_stride = stride * (1 << rst->tile_order);
- width >>= rst->tile_order;
- height >>= rst->tile_order;
+ ptrdiff_t step = 1 << engine->tile_order;
+ ptrdiff_t tile_stride = stride * (1 << engine->tile_order);
+ width >>= engine->tile_order;
+ height >>= engine->tile_order;
for (j = 0; j < height; ++j) {
for (i = 0; i < width; ++i)
- rst->fill_solid(buf + i * step, stride, set);
+ engine->fill_solid(buf + i * step, stride, set);
buf += tile_stride;
}
}
-static inline void rasterizer_fill_halfplane(ASS_Rasterizer *rst,
+static inline void rasterizer_fill_halfplane(const BitmapEngine *engine,
uint8_t *buf, int width, int height, ptrdiff_t stride,
int32_t a, int32_t b, int64_t c, int32_t scale)
{
- assert(!(width & ((1 << rst->tile_order) - 1)));
- assert(!(height & ((1 << rst->tile_order) - 1)));
- if (width == 1 << rst->tile_order && height == 1 << rst->tile_order) {
- rst->fill_halfplane(buf, stride, a, b, c, scale);
+ assert(!(width & ((1 << engine->tile_order) - 1)));
+ assert(!(height & ((1 << engine->tile_order) - 1)));
+ if (width == 1 << engine->tile_order && height == 1 << engine->tile_order) {
+ engine->fill_halfplane(buf, stride, a, b, c, scale);
return;
}
uint32_t abs_a = a < 0 ? -a : a;
uint32_t abs_b = b < 0 ? -b : b;
- int64_t size = (int64_t)(abs_a + abs_b) << (rst->tile_order + 5);
- int64_t offs = ((int64_t)a + b) * (1 << (rst->tile_order + 5));
+ int64_t size = (int64_t)(abs_a + abs_b) << (engine->tile_order + 5);
+ int64_t offs = ((int64_t)a + b) * (1 << (engine->tile_order + 5));
int i, j;
- ptrdiff_t step = 1 << rst->tile_order;
- ptrdiff_t tile_stride = stride * (1 << rst->tile_order);
- width >>= rst->tile_order;
- height >>= rst->tile_order;
+ ptrdiff_t step = 1 << engine->tile_order;
+ ptrdiff_t tile_stride = stride * (1 << engine->tile_order);
+ width >>= engine->tile_order;
+ height >>= engine->tile_order;
for (j = 0; j < height; ++j) {
for (i = 0; i < width; ++i) {
- int64_t cc = c - (a * (int64_t)i + b * (int64_t)j) * (1 << (rst->tile_order + 6));
+ int64_t cc = c - (a * (int64_t)i + b * (int64_t)j) * (1 << (engine->tile_order + 6));
int64_t offs_c = offs - cc;
int64_t abs_c = offs_c < 0 ? -offs_c : offs_c;
if (abs_c < size)
- rst->fill_halfplane(buf + i * step, stride, a, b, cc, scale);
+ engine->fill_halfplane(buf + i * step, stride, a, b, cc, scale);
else
- rst->fill_solid(buf + i * step, stride,
- ((uint32_t)(offs_c >> 32) ^ scale) & 0x80000000);
+ engine->fill_solid(buf + i * step, stride,
+ ((uint32_t)(offs_c >> 32) ^ scale) & 0x80000000);
}
buf += tile_stride;
}
@@ -676,18 +677,19 @@ static inline void rasterizer_fill_halfplane(ASS_Rasterizer *rst,
* Rasterizes (possibly recursive) one quad-tree level.
* Truncates used input buffer.
*/
-static int rasterizer_fill_level(ASS_Rasterizer *rst,
- uint8_t *buf, int width, int height, ptrdiff_t stride, int index, size_t offs, int winding)
+static int rasterizer_fill_level(const BitmapEngine *engine, RasterizerData *rst,
+ uint8_t *buf, int width, int height, ptrdiff_t stride,
+ int index, size_t offs, int winding)
{
assert(width > 0 && height > 0);
assert((unsigned)index < 2u && offs <= rst->size[index]);
- assert(!(width & ((1 << rst->tile_order) - 1)));
- assert(!(height & ((1 << rst->tile_order) - 1)));
+ assert(!(width & ((1 << engine->tile_order) - 1)));
+ assert(!(height & ((1 << engine->tile_order) - 1)));
size_t n = rst->size[index] - offs;
struct segment *line = rst->linebuf[index] + offs;
if (!n) {
- rasterizer_fill_solid(rst, buf, width, height, stride, winding);
+ rasterizer_fill_solid(engine, buf, width, height, stride, winding);
return 1;
}
if (n == 1) {
@@ -701,16 +703,16 @@ static int rasterizer_fill_level(ASS_Rasterizer *rst,
if (winding - 1)
flag ^= 3;
if (flag & 1)
- rasterizer_fill_halfplane(rst, buf, width, height, stride,
+ rasterizer_fill_halfplane(engine, buf, width, height, stride,
line->a, line->b, line->c,
flag & 2 ? -line->scale : line->scale);
else
- rasterizer_fill_solid(rst, buf, width, height, stride, flag & 2);
+ rasterizer_fill_solid(engine, buf, width, height, stride, flag & 2);
rst->size[index] = offs;
return 1;
}
- if (width == 1 << rst->tile_order && height == 1 << rst->tile_order) {
- rst->fill_generic(buf, stride, line, rst->size[index] - offs, winding);
+ if (width == 1 << engine->tile_order && height == 1 << engine->tile_order) {
+ engine->fill_generic(buf, stride, line, rst->size[index] - offs, winding);
rst->size[index] = offs;
return 1;
}
@@ -739,21 +741,21 @@ static int rasterizer_fill_level(ASS_Rasterizer *rst,
rst->size[index ^ 0] = dst0 - rst->linebuf[index ^ 0];
rst->size[index ^ 1] = dst1 - rst->linebuf[index ^ 1];
- if (!rasterizer_fill_level(rst, buf, width, height, stride, index ^ 0, offs, winding))
+ if (!rasterizer_fill_level(engine, rst, buf, width, height, stride, index ^ 0, offs, winding))
return 0;
assert(rst->size[index ^ 0] == offs);
- if (!rasterizer_fill_level(rst, buf1, width1, height1, stride, index ^ 1, offs1, winding1))
+ if (!rasterizer_fill_level(engine, rst, buf1, width1, height1, stride, index ^ 1, offs1, winding1))
return 0;
assert(rst->size[index ^ 1] == offs1);
return 1;
}
-int rasterizer_fill(ASS_Rasterizer *rst,
+int rasterizer_fill(const BitmapEngine *engine, RasterizerData *rst,
uint8_t *buf, int x0, int y0, int width, int height, ptrdiff_t stride)
{
assert(width > 0 && height > 0);
- assert(!(width & ((1 << rst->tile_order) - 1)));
- assert(!(height & ((1 << rst->tile_order) - 1)));
+ assert(!(width & ((1 << engine->tile_order) - 1)));
+ assert(!(height & ((1 << engine->tile_order) - 1)));
x0 *= 1 << 6; y0 *= 1 << 6;
size_t n = rst->size[0];
@@ -805,6 +807,6 @@ int rasterizer_fill(ASS_Rasterizer *rst,
}
rst->size[index] = n;
rst->size[index ^ 1] = 0;
- return rasterizer_fill_level(rst, buf, width, height, stride,
+ return rasterizer_fill_level(engine, rst, buf, width, height, stride,
index, 0, winding);
}
diff --git a/libass/ass_rasterizer.h b/libass/ass_rasterizer.h
index d20feb3..73cdba4 100644
--- a/libass/ass_rasterizer.h
+++ b/libass/ass_rasterizer.h
@@ -22,7 +22,7 @@
#include <stddef.h>
#include <stdint.h>
-#include "ass.h"
+#include "ass_bitmap.h"
#include "ass_font.h"
@@ -42,48 +42,25 @@ struct segment {
int32_t x_min, x_max, y_min, y_max;
};
-
-typedef void (*FillSolidTileFunc)(uint8_t *buf, ptrdiff_t stride, int set);
-typedef void (*FillHalfplaneTileFunc)(uint8_t *buf, ptrdiff_t stride,
- int32_t a, int32_t b, int64_t c, int32_t scale);
-typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride,
- const struct segment *line, size_t n_lines,
- int winding);
-
-void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride, int set);
-void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride, int set);
-void ass_fill_halfplane_tile16_c(uint8_t *buf, ptrdiff_t stride,
- int32_t a, int32_t b, int64_t c, int32_t scale);
-void ass_fill_halfplane_tile32_c(uint8_t *buf, ptrdiff_t stride,
- int32_t a, int32_t b, int64_t c, int32_t scale);
-void ass_fill_generic_tile16_c(uint8_t *buf, ptrdiff_t stride,
- const struct segment *line, size_t n_lines,
- int winding);
-void ass_fill_generic_tile32_c(uint8_t *buf, ptrdiff_t stride,
- const struct segment *line, size_t n_lines,
- int winding);
-
-typedef struct ass_rasterizer {
+typedef struct {
int outline_error; // acceptable error (in 1/64 pixel units)
- int tile_order; // log2(tile_size)
- FillSolidTileFunc fill_solid;
- FillHalfplaneTileFunc fill_halfplane;
- FillGenericTileFunc fill_generic;
-
- int32_t x_min, x_max, y_min, y_max; // usable after rasterizer_set_outline
+ // usable after rasterizer_set_outline
+ int32_t x_min, x_max, y_min, y_max;
// internal buffers
struct segment *linebuf[2];
size_t size[2], capacity[2];
-} ASS_Rasterizer;
+} RasterizerData;
+
+void rasterizer_init(RasterizerData *rst, int outline_error);
+void rasterizer_done(RasterizerData *rst);
-void rasterizer_init(ASS_Rasterizer *rst);
-void rasterizer_done(ASS_Rasterizer *rst);
/**
* \brief Convert FreeType outline to polyline and calculate exact bounds
*/
-int rasterizer_set_outline(ASS_Rasterizer *rst, const ASS_Outline *path);
+int rasterizer_set_outline(RasterizerData *rst, const ASS_Outline *path);
+
/**
* \brief Polyline rasterization function
* \param x0, y0, width, height in: source window (full pixel units)
@@ -92,7 +69,8 @@ int rasterizer_set_outline(ASS_Rasterizer *rst, const ASS_Outline *path);
* \return zero on error
* Deletes preprocessed polyline after work.
*/
-int rasterizer_fill(ASS_Rasterizer *rst, uint8_t *buf, int x0, int y0,
+int rasterizer_fill(const BitmapEngine *engine, RasterizerData *rst,
+ uint8_t *buf, int x0, int y0,
int width, int height, ptrdiff_t stride);
diff --git a/libass/ass_render.c b/libass/ass_render.c
index 357e1cc..0995b96 100644
--- a/libass/ass_render.c
+++ b/libass/ass_render.c
@@ -34,12 +34,6 @@
#define SUBPIXEL_MASK 63
#define SUBPIXEL_ACCURACY 7
-#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
-
-#include "x86/blend_bitmaps.h"
-#include "x86/rasterizer.h"
-
-#endif // ASM
ASS_Renderer *ass_renderer_init(ASS_Library *library)
{
@@ -70,56 +64,19 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library)
priv->ftlibrary = ft;
// images_root and related stuff is zero-filled in calloc
- #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
- int sse2 = has_sse2();
- int avx2 = has_avx2();
- priv->add_bitmaps_func = avx2 ? ass_add_bitmaps_avx2 :
- (sse2 ? ass_add_bitmaps_sse2 : ass_add_bitmaps_x86);
- #ifdef __x86_64__
- priv->mul_bitmaps_func = avx2 ? ass_mul_bitmaps_avx2 : ass_mul_bitmaps_sse2;
- priv->sub_bitmaps_func = avx2 ? ass_sub_bitmaps_avx2 : ass_sub_bitmaps_sse2;
- #else
- priv->mul_bitmaps_func = mul_bitmaps_c;
- priv->sub_bitmaps_func = ass_sub_bitmaps_x86;
- #endif
- #else
- priv->add_bitmaps_func = add_bitmaps_c;
- priv->sub_bitmaps_func = sub_bitmaps_c;
- priv->mul_bitmaps_func = mul_bitmaps_c;
- #endif
-
-#if CONFIG_RASTERIZER
-#if CONFIG_LARGE_TILES
- priv->rasterizer.tile_order = 5;
- #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
- priv->rasterizer.fill_solid = avx2 ? ass_fill_solid_tile32_avx2 :
- (sse2 ? ass_fill_solid_tile32_sse2 : ass_fill_solid_tile32_c);
- priv->rasterizer.fill_halfplane = avx2 ? ass_fill_halfplane_tile32_avx2 :
- (sse2 ? ass_fill_halfplane_tile32_sse2 : ass_fill_halfplane_tile32_c);
- priv->rasterizer.fill_generic = avx2 ? ass_fill_generic_tile32_avx2 :
- (sse2 ? ass_fill_generic_tile32_sse2 : ass_fill_generic_tile32_c);
- #else
- priv->rasterizer.fill_solid = ass_fill_solid_tile32_c;
- priv->rasterizer.fill_halfplane = ass_fill_halfplane_tile32_c;
- priv->rasterizer.fill_generic = ass_fill_generic_tile32_c;
- #endif
+#if (defined(__i386__) || defined(__x86_64__)) &