summaryrefslogtreecommitdiffstats
path: root/libass/ass_rasterizer_c.c
diff options
context:
space:
mode:
Diffstat (limited to 'libass/ass_rasterizer_c.c')
-rw-r--r--libass/ass_rasterizer_c.c382
1 files changed, 382 insertions, 0 deletions
diff --git a/libass/ass_rasterizer_c.c b/libass/ass_rasterizer_c.c
new file mode 100644
index 0000000..8993ed6
--- /dev/null
+++ b/libass/ass_rasterizer_c.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (C) 2014 Vabishchevich Nikolay <vabnick@gmail.com>
+ *
+ * This file is part of libass.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "ass_utils.h"
+#include "ass_rasterizer.h"
+#include <assert.h>
+
+
+
+void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride)
+{
+ int i, j;
+ int8_t value = 255;
+ for (j = 0; j < 16; ++j) {
+ for (i = 0; i < 16; ++i)
+ buf[i] = value;
+ buf += stride;
+ }
+}
+
+void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride)
+{
+ int i, j;
+ int8_t value = 255;
+ for (j = 0; j < 32; ++j) {
+ for (i = 0; i < 32; ++i)
+ buf[i] = value;
+ buf += stride;
+ }
+}
+
+
+/*
+ * Halfplane Filling Functions
+ *
+ * Fill pixels with antialiasing corresponding to equation
+ * A * x + B * y < C, where
+ * x, y - offset of pixel center from bottom-left,
+ * A = a * scale, B = b * scale, C = c * scale / 64.
+ *
+ * Normalization of coefficients prior call:
+ * max(abs(a), abs(b)) * scale = 1 << 61
+ *
+ * Used Algorithm
+ * Let
+ * max_ab = max(abs(A), abs(B)),
+ * min_ab = min(abs(A), abs(B)),
+ * CC = C - A * x - B * y, then
+ * result = (clamp((CC - min_ab / 4) / max_ab) +
+ * clamp((CC + min_ab / 4) / max_ab) +
+ * 1) / 2,
+ * where clamp(Z) = max(-0.5, min(0.5, Z)).
+ */
+
+void ass_fill_halfplane_tile16_c(uint8_t *buf, ptrdiff_t stride,
+ int32_t a, int32_t b, int64_t c, int32_t scale)
+{
+ int16_t aa = (a * (int64_t)scale + ((int64_t)1 << 49)) >> 50;
+ int16_t bb = (b * (int64_t)scale + ((int64_t)1 << 49)) >> 50;
+ int16_t cc = ((int32_t)(c >> 11) * (int64_t)scale + ((int64_t)1 << 44)) >> 45;
+ cc += (1 << 9) - ((aa + bb) >> 1);
+
+ int16_t abs_a = aa < 0 ? -aa : aa;
+ int16_t abs_b = bb < 0 ? -bb : bb;
+ int16_t delta = (FFMIN(abs_a, abs_b) + 2) >> 2;
+
+ int i, j;
+ int16_t va1[16], va2[16];
+ for (i = 0; i < 16; ++i) {
+ va1[i] = aa * i - delta;
+ va2[i] = aa * i + delta;
+ }
+
+ static const int16_t full = (1 << 10) - 1;
+ for (j = 0; j < 16; ++j) {
+ for (i = 0; i < 16; ++i) {
+ int16_t c1 = cc - va1[i];
+ int16_t c2 = cc - va2[i];
+ c1 = FFMINMAX(c1, 0, full);
+ c2 = FFMINMAX(c2, 0, full);
+ buf[i] = (c1 + c2) >> 3;
+ }
+ buf += stride;
+ cc -= bb;
+ }
+}
+
+void ass_fill_halfplane_tile32_c(uint8_t *buf, ptrdiff_t stride,
+ int32_t a, int32_t b, int64_t c, int32_t scale)
+{
+ int16_t aa = (a * (int64_t)scale + ((int64_t)1 << 50)) >> 51;
+ int16_t bb = (b * (int64_t)scale + ((int64_t)1 << 50)) >> 51;
+ int16_t cc = ((int32_t)(c >> 12) * (int64_t)scale + ((int64_t)1 << 44)) >> 45;
+ cc += (1 << 8) - ((aa + bb) >> 1);
+
+ int16_t abs_a = aa < 0 ? -aa : aa;
+ int16_t abs_b = bb < 0 ? -bb : bb;
+ int16_t delta = (FFMIN(abs_a, abs_b) + 2) >> 2;
+
+ int i, j;
+ int16_t va1[32], va2[32];
+ for (i = 0; i < 32; ++i) {
+ va1[i] = aa * i - delta;
+ va2[i] = aa * i + delta;
+ }
+
+ static const int16_t full = (1 << 9) - 1;
+ for (j = 0; j < 32; ++j) {
+ for (i = 0; i < 32; ++i) {
+ int16_t c1 = cc - va1[i];
+ int16_t c2 = cc - va2[i];
+ c1 = FFMINMAX(c1, 0, full);
+ c2 = FFMINMAX(c2, 0, full);
+ buf[i] = (c1 + c2) >> 2;
+ }
+ buf += stride;
+ cc -= bb;
+ }
+}
+
+
+/*
+ * Generic Filling Functions
+ *
+ * Used Algorithm
+ * Construct trapezium from each polyline segment and its projection into left side of tile.
+ * Render that trapezium into internal buffer with additive blending and correct sign.
+ * Store clamped absolute value from internal buffer into result buffer.
+ */
+
+// Render top/bottom line of the trapezium with antialiasing
+static inline void update_border_line16(int16_t res[16],
+ int16_t abs_a, const int16_t va[16],
+ int16_t b, int16_t abs_b,
+ int16_t c, int dn, int up)
+{
+ int16_t size = up - dn;
+ int16_t w = (1 << 10) + (size << 4) - abs_a;
+ w = FFMIN(w, 1 << 10) << 3;
+
+ int16_t dc_b = abs_b * (int32_t)size >> 6;
+ int16_t dc = (FFMIN(abs_a, dc_b) + 2) >> 2;
+
+ int16_t base = (int32_t)b * (int16_t)(dn + up) >> 7;
+ int16_t offs1 = size - ((base + dc) * (int32_t)w >> 16);
+ int16_t offs2 = size - ((base - dc) * (int32_t)w >> 16);
+
+ int i;
+ size <<= 1;
+ for (i = 0; i < 16; ++i) {
+ int16_t cw = (c - va[i]) * (int32_t)w >> 16;
+ int16_t c1 = cw + offs1;
+ int16_t c2 = cw + offs2;
+ c1 = FFMINMAX(c1, 0, size);
+ c2 = FFMINMAX(c2, 0, size);
+ res[i] += c1 + c2;
+ }
+}
+
+void ass_fill_generic_tile16_c(uint8_t *buf, ptrdiff_t stride,
+ const struct segment *line, size_t n_lines,
+ int winding)
+{
+ int i, j;
+ int16_t res[16][16], delta[18];
+ for (j = 0; j < 16; ++j)
+ for (i = 0; i < 16; ++i)
+ res[j][i] = 0;
+ for (j = 0; j < 18; ++j)
+ delta[j] = 0;
+
+ static const int16_t full = 1 << 10;
+ const struct segment *end = line + n_lines;
+ for (; line != end; ++line) {
+ assert(line->y_min >= 0 && line->y_min < 1 << 10);
+ assert(line->y_max > 0 && line->y_max <= 1 << 10);
+ assert(line->y_min <= line->y_max);
+
+ int16_t dn_delta = line->flags & SEGFLAG_UP ? 4 : 0;
+ int16_t up_delta = dn_delta;
+ if (!line->x_min && (line->flags & SEGFLAG_EXACT_LEFT))up_delta ^= 4;
+ if (line->flags & SEGFLAG_UR_DL) {
+ int16_t tmp = dn_delta;
+ dn_delta = up_delta;
+ up_delta = tmp;
+ }
+
+ int dn = line->y_min >> 6, up = line->y_max >> 6;
+ int16_t dn_pos = line->y_min & 63;
+ int16_t dn_delta1 = dn_delta * dn_pos;
+ int16_t up_pos = line->y_max & 63;
+ int16_t up_delta1 = up_delta * up_pos;
+ delta[dn + 1] -= dn_delta1;
+ delta[dn] -= (dn_delta << 6) - dn_delta1;
+ delta[up + 1] += up_delta1;
+ delta[up] += (up_delta << 6) - up_delta1;
+ if (line->y_min == line->y_max)
+ continue;
+
+ int16_t a = (line->a * (int64_t)line->scale + ((int64_t)1 << 49)) >> 50;
+ int16_t b = (line->b * (int64_t)line->scale + ((int64_t)1 << 49)) >> 50;
+ int16_t c = ((int32_t)(line->c >> 11) * (int64_t)line->scale + ((int64_t)1 << 44)) >> 45;
+ c -= (a >> 1) + b * dn;
+
+ int16_t va[16];
+ for (i = 0; i < 16; ++i)
+ va[i] = a * i;
+ int16_t abs_a = a < 0 ? -a : a;
+ int16_t abs_b = b < 0 ? -b : b;
+ int16_t dc = (FFMIN(abs_a, abs_b) + 2) >> 2;
+ int16_t base = (1 << 9) - (b >> 1);
+ int16_t dc1 = base + dc;
+ int16_t dc2 = base - dc;
+
+ if (dn_pos) {
+ if (up == dn) {
+ update_border_line16(res[dn], abs_a, va, b, abs_b, c, dn_pos, up_pos);
+ continue;
+ }
+ update_border_line16(res[dn], abs_a, va, b, abs_b, c, dn_pos, 64);
+ dn++;
+ c -= b;
+ }
+ for (j = dn; j < up; ++j) {
+ for (i = 0; i < 16; ++i) {
+ int16_t c1 = c - va[i] + dc1;
+ int16_t c2 = c - va[i] + dc2;
+ c1 = FFMINMAX(c1, 0, full);
+ c2 = FFMINMAX(c2, 0, full);
+ res[j][i] += (c1 + c2) >> 3;
+ }
+ c -= b;
+ }
+ if (up_pos)
+ update_border_line16(res[up], abs_a, va, b, abs_b, c, 0, up_pos);
+ }
+
+ int16_t cur = winding << 8;
+ for (j = 0; j < 16; ++j) {
+ cur += delta[j];
+ for (i = 0; i < 16; ++i) {
+ int16_t val = res[j][i] + cur, neg_val = -val;
+ val = (val > neg_val ? val : neg_val);
+ buf[i] = FFMIN(val, 255);
+ }
+ buf += stride;
+ }
+}
+
+// Render top/bottom line of the trapezium with antialiasing
+static inline void update_border_line32(int16_t res[32],
+ int16_t abs_a, const int16_t va[32],
+ int16_t b, int16_t abs_b,
+ int16_t c, int dn, int up)
+{
+ int16_t size = up - dn;
+ int16_t w = (1 << 9) + (size << 3) - abs_a;
+ w = FFMIN(w, 1 << 9) << 5;
+
+ int16_t dc_b = abs_b * (int32_t)size >> 6;
+ int16_t dc = (FFMIN(abs_a, dc_b) + 2) >> 2;
+
+ int16_t base = (int32_t)b * (int16_t)(dn + up) >> 7;
+ int16_t offs1 = size - ((base + dc) * (int32_t)w >> 16);
+ int16_t offs2 = size - ((base - dc) * (int32_t)w >> 16);
+
+ int i;
+ size <<= 1;
+ for (i = 0; i < 32; ++i) {
+ int16_t cw = (c - va[i]) * (int32_t)w >> 16;
+ int16_t c1 = cw + offs1;
+ int16_t c2 = cw + offs2;
+ c1 = FFMINMAX(c1, 0, size);
+ c2 = FFMINMAX(c2, 0, size);
+ res[i] += c1 + c2;
+ }
+}
+
+void ass_fill_generic_tile32_c(uint8_t *buf, ptrdiff_t stride,
+ const struct segment *line, size_t n_lines,
+ int winding)
+{
+ int i, j;
+ int16_t res[32][32], delta[34];
+ for (j = 0; j < 32; ++j)
+ for (i = 0; i < 32; ++i)
+ res[j][i] = 0;
+ for (j = 0; j < 34; ++j)
+ delta[j] = 0;
+
+ static const int16_t full = 1 << 9;
+ const struct segment *end = line + n_lines;
+ for (; line != end; ++line) {
+ assert(line->y_min >= 0 && line->y_min < 1 << 11);
+ assert(line->y_max > 0 && line->y_max <= 1 << 11);
+ assert(line->y_min <= line->y_max);
+
+ int16_t dn_delta = line->flags & SEGFLAG_UP ? 4 : 0;
+ int16_t up_delta = dn_delta;
+ if (!line->x_min && (line->flags & SEGFLAG_EXACT_LEFT))up_delta ^= 4;
+ if (line->flags & SEGFLAG_UR_DL) {
+ int16_t tmp = dn_delta;
+ dn_delta = up_delta;
+ up_delta = tmp;
+ }
+
+ int dn = line->y_min >> 6, up = line->y_max >> 6;
+ int16_t dn_pos = line->y_min & 63;
+ int16_t dn_delta1 = dn_delta * dn_pos;
+ int16_t up_pos = line->y_max & 63;
+ int16_t up_delta1 = up_delta * up_pos;
+ delta[dn + 1] -= dn_delta1;
+ delta[dn] -= (dn_delta << 6) - dn_delta1;
+ delta[up + 1] += up_delta1;
+ delta[up] += (up_delta << 6) - up_delta1;
+ if (line->y_min == line->y_max)
+ continue;
+
+ int16_t a = (line->a * (int64_t)line->scale + ((int64_t)1 << 50)) >> 51;
+ int16_t b = (line->b * (int64_t)line->scale + ((int64_t)1 << 50)) >> 51;
+ int16_t c = ((int32_t)(line->c >> 12) * (int64_t)line->scale + ((int64_t)1 << 44)) >> 45;
+ c -= (a >> 1) + b * dn;
+
+ int16_t va[32];
+ for (i = 0; i < 32; ++i)
+ va[i] = a * i;
+ int16_t abs_a = a < 0 ? -a : a;
+ int16_t abs_b = b < 0 ? -b : b;
+ int16_t dc = (FFMIN(abs_a, abs_b) + 2) >> 2;
+ int16_t base = (1 << 8) - (b >> 1);
+ int16_t dc1 = base + dc;
+ int16_t dc2 = base - dc;
+
+ if (dn_pos) {
+ if (up == dn) {
+ update_border_line32(res[dn], abs_a, va, b, abs_b, c, dn_pos, up_pos);
+ continue;
+ }
+ update_border_line32(res[dn], abs_a, va, b, abs_b, c, dn_pos, 64);
+ dn++;
+ c -= b;
+ }
+ for (j = dn; j < up; ++j) {
+ for (i = 0; i < 32; ++i) {
+ int16_t c1 = c - va[i] + dc1;
+ int16_t c2 = c - va[i] + dc2;
+ c1 = FFMINMAX(c1, 0, full);
+ c2 = FFMINMAX(c2, 0, full);
+ res[j][i] += (c1 + c2) >> 2;
+ }
+ c -= b;
+ }
+ if (up_pos)
+ update_border_line32(res[up], abs_a, va, b, abs_b, c, 0, up_pos);
+ }
+
+ int16_t cur = winding << 8;
+ for (j = 0; j < 32; ++j) {
+ cur += delta[j];
+ for (i = 0; i < 32; ++i) {
+ int16_t val = res[j][i] + cur, neg_val = -val;
+ val = (val > neg_val ? val : neg_val);
+ buf[i] = FFMIN(val, 255);
+ }
+ buf += stride;
+ }
+}