summaryrefslogtreecommitdiffstats
path: root/libass/ass_bitmap.c
diff options
context:
space:
mode:
authorDr.Smile <vabnick@gmail.com>2015-06-26 02:14:21 +0300
committerDr.Smile <vabnick@gmail.com>2015-06-26 02:14:21 +0300
commit38a175cad5a894467cb7187ad0187db1ae3f89ff (patch)
treea7262be6620f31c2f0d8df26218c6160c5354f56 /libass/ass_bitmap.c
parent88f1fd37fe5785851f069e3dc6939f1ab905f4bc (diff)
downloadlibass-38a175cad5a894467cb7187ad0187db1ae3f89ff.tar.bz2
libass-38a175cad5a894467cb7187ad0187db1ae3f89ff.tar.xz
Switch to virtual function table
Use one pointer to table of functions instead of scattered bunch of function pointers. Different versions of these tables can be constructed in compile time. Also, bitmap memory alignment now depends only on SSE2/AVX2 support and is constant for every width. That simplifies code without noticeable performance penalty.
Diffstat (limited to 'libass/ass_bitmap.c')
-rw-r--r--libass/ass_bitmap.c126
1 files changed, 66 insertions, 60 deletions
diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c
index aa92d50..827c721 100644
--- a/libass/ass_bitmap.c
+++ b/libass/ass_bitmap.c
@@ -33,10 +33,30 @@
#include "ass_bitmap.h"
#include "ass_render.h"
+
+#define ALIGN C_ALIGN_ORDER
+#define DECORATE(func) ass_##func##_c
+#include "ass_func_template.h"
+#undef ALIGN
+#undef DECORATE
+
#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
-#include "x86/be_blur.h"
+
+#define ALIGN 4
+#define DECORATE(func) ass_##func##_sse2
+#include "ass_func_template.h"
+#undef ALIGN
+#undef DECORATE
+
+#define ALIGN 5
+#define DECORATE(func) ass_##func##_avx2
+#include "ass_func_template.h"
+#undef ALIGN
+#undef DECORATE
+
#endif
+
static const unsigned base = 256;
struct ass_synth_priv {
@@ -51,8 +71,6 @@ struct ass_synth_priv {
unsigned *gt2;
double radius;
-
- BEBlurFunc be_blur_func;
};
static bool generate_tables(ASS_SynthPriv *priv, double radius)
@@ -137,7 +155,8 @@ static bool resize_tmp(ASS_SynthPriv *priv, int w, int h)
return !!priv->tmp;
}
-void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be,
+void ass_synth_blur(const BitmapEngine *engine,
+ ASS_SynthPriv *priv_blur, int opaque_box, int be,
double blur_radius, Bitmap *bm_g, Bitmap *bm_o)
{
if(blur_radius > 0.0 || be){
@@ -175,20 +194,12 @@ void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be,
be_blur_pre(buf, w, h, stride);
while(--passes){
memset(tmp, 0, stride * 2);
- if(w < 16){
- be_blur_c(buf, w, h, stride, tmp);
- }else{
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
- }
+ engine->be_blur(buf, w, h, stride, tmp);
}
be_blur_post(buf, w, h, stride);
}
memset(tmp, 0, stride * 2);
- if(w < 16){
- be_blur_c(buf, w, h, stride, tmp);
- }else{
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
- }
+ engine->be_blur(buf, w, h, stride, tmp);
}
}
if (!bm_o || opaque_box) {
@@ -202,12 +213,12 @@ void ass_synth_blur(ASS_SynthPriv *priv_blur, int opaque_box, int be,
be_blur_pre(buf, w, h, stride);
while(--passes){
memset(tmp, 0, stride * 2);
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
+ engine->be_blur(buf, w, h, stride, tmp);
}
be_blur_post(buf, w, h, stride);
}
memset(tmp, 0, stride * 2);
- priv_blur->be_blur_func(buf, w, h, stride, tmp);
+ engine->be_blur(buf, w, h, stride, tmp);
}
}
}
@@ -220,16 +231,6 @@ ASS_SynthPriv *ass_synth_init(double radius)
free(priv);
return NULL;
}
- #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
- int avx2 = has_avx2();
- #ifdef __x86_64__
- priv->be_blur_func = avx2 ? ass_be_blur_avx2 : ass_be_blur_sse2;
- #else
- priv->be_blur_func = be_blur_c;
- #endif
- #else
- priv->be_blur_func = be_blur_c;
- #endif
return priv;
}
@@ -242,36 +243,42 @@ void ass_synth_done(ASS_SynthPriv *priv)
free(priv);
}
-static Bitmap *alloc_bitmap_raw(int w, int h)
+static bool alloc_bitmap_buffer(const BitmapEngine *engine, Bitmap *bm, int w, int h)
{
- Bitmap *bm;
-
- unsigned align = (w >= 32) ? 32 : ((w >= 16) ? 16 : 1);
+ unsigned align = 1 << engine->align_order;
size_t s = ass_align(align, w);
// Too often we use ints as offset for bitmaps => use INT_MAX.
if (s > (INT_MAX - 32) / FFMAX(h, 1))
- return NULL;
- bm = malloc(sizeof(Bitmap));
+ return false;
+ uint8_t *buf = ass_aligned_alloc(align, s * h + 32);
+ if (!buf)
+ return false;
+ bm->w = w;
+ bm->h = h;
+ bm->stride = s;
+ bm->buffer = buf;
+ return true;
+}
+
+static Bitmap *alloc_bitmap_raw(const BitmapEngine *engine, int w, int h)
+{
+ Bitmap *bm = malloc(sizeof(Bitmap));
if (!bm)
return NULL;
- bm->buffer = ass_aligned_alloc(align, s * h + 32);
- if (!bm->buffer) {
+ if (!alloc_bitmap_buffer(engine, bm, w, h)) {
free(bm);
return NULL;
}
- bm->w = w;
- bm->h = h;
- bm->stride = s;
- bm->left = bm->top = 0;
return bm;
}
-Bitmap *alloc_bitmap(int w, int h)
+Bitmap *alloc_bitmap(const BitmapEngine *engine, int w, int h)
{
- Bitmap *bm = alloc_bitmap_raw(w, h);
+ Bitmap *bm = alloc_bitmap_raw(engine, w, h);
if(!bm)
return NULL;
memset(bm->buffer, 0, bm->stride * bm->h + 32);
+ bm->left = bm->top = 0;
return bm;
}
@@ -282,9 +289,9 @@ void ass_free_bitmap(Bitmap *bm)
free(bm);
}
-Bitmap *copy_bitmap(const Bitmap *src)
+Bitmap *copy_bitmap(const BitmapEngine *engine, const Bitmap *src)
{
- Bitmap *dst = alloc_bitmap_raw(src->w, src->h);
+ Bitmap *dst = alloc_bitmap_raw(engine, src->w, src->h);
if (!dst)
return NULL;
dst->left = src->left;
@@ -298,7 +305,7 @@ Bitmap *copy_bitmap(const Bitmap *src)
Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
ASS_Outline *outline, int bord)
{
- ASS_Rasterizer *rst = &render_priv->rasterizer;
+ RasterizerData *rst = &render_priv->rasterizer;
if (!rasterizer_set_outline(rst, outline)) {
ass_msg(render_priv->library, MSGL_WARN, "Failed to process glyph outline!\n");
return NULL;
@@ -308,7 +315,7 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
return NULL;
if (rst->x_min >= rst->x_max || rst->y_min >= rst->y_max) {
- Bitmap *bm = alloc_bitmap(2 * bord, 2 * bord);
+ Bitmap *bm = alloc_bitmap(render_priv->engine, 2 * bord, 2 * bord);
if (!bm)
return NULL;
bm->left = bm->top = -bord;
@@ -325,7 +332,7 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
int w = x_max - x_min;
int h = y_max - y_min;
- int mask = (1 << rst->tile_order) - 1;
+ int mask = (1 << render_priv->engine->tile_order) - 1;
if (w < 0 || h < 0 || w > 8000000 / FFMAX(h, 1) ||
w > INT_MAX - (2 * bord + mask) || h > INT_MAX - (2 * bord + mask)) {
@@ -336,13 +343,13 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv,
int tile_w = (w + 2 * bord + mask) & ~mask;
int tile_h = (h + 2 * bord + mask) & ~mask;
- Bitmap *bm = alloc_bitmap_raw(tile_w, tile_h);
+ Bitmap *bm = alloc_bitmap_raw(render_priv->engine, tile_w, tile_h);
if (!bm)
return NULL;
bm->left = x_min - bord;
bm->top = y_min - bord;
- if (!rasterizer_fill(rst, bm->buffer,
+ if (!rasterizer_fill(render_priv->engine, rst, bm->buffer,
x_min - bord, y_min - bord,
bm->stride, tile_h, bm->stride)) {
ass_msg(render_priv->library, MSGL_WARN, "Failed to rasterize glyph!\n");
@@ -651,9 +658,8 @@ void ass_gauss_blur(unsigned char *buffer, unsigned *tmp2,
* This blur is the same as the one employed by vsfilter.
* Pure C implementation.
*/
-void be_blur_c(uint8_t *buf, intptr_t w,
- intptr_t h, intptr_t stride,
- uint16_t *tmp)
+void ass_be_blur_c(uint8_t *buf, intptr_t w, intptr_t h,
+ intptr_t stride, uint16_t *tmp)
{
uint16_t *col_pix_buf = tmp;
uint16_t *col_sum_buf = tmp + w;
@@ -800,9 +806,9 @@ int outline_to_bitmap2(ASS_Renderer *render_priv,
* \brief Add two bitmaps together at a given position
* Uses additive blending, clipped to [0,255]. Pure C implementation.
*/
-void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width)
+void ass_add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src, intptr_t src_stride,
+ intptr_t height, intptr_t width)
{
unsigned out;
uint8_t* end = dst + dst_stride * height;
@@ -816,9 +822,9 @@ void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
}
}
-void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width)
+void ass_sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src, intptr_t src_stride,
+ intptr_t height, intptr_t width)
{
short out;
uint8_t* end = dst + dst_stride * height;
@@ -832,10 +838,10 @@ void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
}
}
-void mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
- uint8_t *src1, intptr_t src1_stride,
- uint8_t *src2, intptr_t src2_stride,
- intptr_t w, intptr_t h)
+void ass_mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+ uint8_t *src1, intptr_t src1_stride,
+ uint8_t *src2, intptr_t src2_stride,
+ intptr_t w, intptr_t h)
{
uint8_t* end = src1 + src1_stride * h;
while (src1 < end) {