summaryrefslogtreecommitdiffstats
path: root/libass
diff options
context:
space:
mode:
Diffstat (limited to 'libass')
-rw-r--r--libass/Makefile.am63
-rw-r--r--libass/Makefile_library.am75
-rw-r--r--libass/aarch64/asm.S280
-rw-r--r--libass/aarch64/be_blur.S150
-rw-r--r--libass/aarch64/blend_bitmaps.S162
-rw-r--r--libass/aarch64/blur.S485
-rw-r--r--libass/aarch64/rasterizer.S472
-rw-r--r--libass/ass.c355
-rw-r--r--libass/ass.h132
-rw-r--r--libass/ass/meson.build13
-rw-r--r--libass/ass_bitmap.c282
-rw-r--r--libass/ass_bitmap.h88
-rw-r--r--libass/ass_bitmap_engine.c204
-rw-r--r--libass/ass_bitmap_engine.h99
-rw-r--r--libass/ass_blur.c407
-rw-r--r--libass/ass_cache.c28
-rw-r--r--libass/ass_cache.h3
-rw-r--r--libass/ass_cache_template.h9
-rw-r--r--libass/ass_compat.h17
-rw-r--r--libass/ass_coretext.c261
-rw-r--r--libass/ass_coretext.h2
-rw-r--r--libass/ass_directwrite.c688
-rw-r--r--libass/ass_directwrite.h2
-rw-r--r--libass/ass_directwrite_info_template.h55
-rw-r--r--libass/ass_drawing.c315
-rw-r--r--libass/ass_drawing.h1
-rw-r--r--libass/ass_filesystem.c411
-rw-r--r--libass/ass_filesystem.h44
-rw-r--r--libass/ass_font.c697
-rw-r--r--libass/ass_font.h22
-rw-r--r--libass/ass_fontconfig.c19
-rw-r--r--libass/ass_fontconfig.h2
-rw-r--r--libass/ass_fontselect.c557
-rw-r--r--libass/ass_fontselect.h70
-rw-r--r--libass/ass_func_template.h130
-rw-r--r--libass/ass_library.c27
-rw-r--r--libass/ass_library.h6
-rw-r--r--libass/ass_outline.c366
-rw-r--r--libass/ass_outline.h55
-rw-r--r--libass/ass_parse.c638
-rw-r--r--libass/ass_parse.h20
-rw-r--r--libass/ass_priv.h9
-rw-r--r--libass/ass_rasterizer.c23
-rw-r--r--libass/ass_rasterizer.h14
-rw-r--r--libass/ass_rasterizer_c.c377
-rw-r--r--libass/ass_render.c1449
-rw-r--r--libass/ass_render.h66
-rw-r--r--libass/ass_render_api.c31
-rw-r--r--libass/ass_shaper.c434
-rw-r--r--libass/ass_shaper.h11
-rw-r--r--libass/ass_types.h178
-rw-r--r--libass/ass_utils.c244
-rw-r--r--libass/ass_utils.h99
-rw-r--r--libass/c/blur_template.h343
-rw-r--r--libass/c/c_be_blur.c73
-rw-r--r--libass/c/c_blend_bitmaps.c75
-rw-r--r--libass/c/c_blur.c74
-rw-r--r--libass/c/c_rasterizer.c33
-rw-r--r--libass/c/rasterizer_template.h236
-rw-r--r--libass/dwrite_c.h373
-rw-r--r--libass/meson.build149
-rw-r--r--libass/wyhash.h268
-rw-r--r--libass/x86/be_blur.asm426
-rw-r--r--libass/x86/blend_bitmaps.asm466
-rw-r--r--libass/x86/blur.asm308
-rw-r--r--libass/x86/cpuid.asm26
-rw-r--r--libass/x86/cpuid.h5
-rw-r--r--libass/x86/gaussian.asm0
-rw-r--r--libass/x86/rasterizer.asm424
-rw-r--r--libass/x86/utils.asm15
-rw-r--r--libass/x86/x86inc.asm309
71 files changed, 9484 insertions, 4766 deletions
diff --git a/libass/Makefile.am b/libass/Makefile.am
deleted file mode 100644
index ac3c545..0000000
--- a/libass/Makefile.am
+++ /dev/null
@@ -1,63 +0,0 @@
-AM_CFLAGS = -std=gnu99 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter \
- -Werror-implicit-function-declaration -Wstrict-prototypes \
- -Wpointer-arith -Wredundant-decls -Wno-missing-field-initializers\
- -D_GNU_SOURCE
-
-LIBASS_LT_CURRENT = 10
-LIBASS_LT_REVISION = 1
-LIBASS_LT_AGE = 1
-
-nasm_verbose = $(nasm_verbose_$(V))
-nasm_verbose_ = $(nasm_verbose_$(AM_DEFAULT_VERBOSITY))
-nasm_verbose_0 = @echo " NASM " $@;
-
-.asm.lo:
- $(nasm_verbose)$(LIBTOOL) $(AM_V_lt) --tag=CC --mode=compile $(AS) $(ASFLAGS) -I$(srcdir)/ -o $@ $< -prefer-non-pic
-
-SRC_INTEL = x86/rasterizer.asm x86/blend_bitmaps.asm x86/blur.asm x86/cpuid.asm \
- x86/cpuid.h
-SRC_INTEL64 = x86/be_blur.asm
-
-SRC_FONTCONFIG = ass_fontconfig.c ass_fontconfig.h
-SRC_DIRECTWRITE = ass_directwrite.c ass_directwrite.h dwrite_c.h
-SRC_CORETEXT = ass_coretext.c ass_coretext.h
-
-lib_LTLIBRARIES = libass.la
-libass_la_SOURCES = ass.h ass.c ass_types.h ass_utils.h ass_utils.c \
- ass_compat.h ass_string.h ass_string.c ass_strtod.c \
- ass_library.h ass_library.c ass_cache.h ass_cache.c ass_cache_template.h \
- ass_font.h ass_font.c ass_fontselect.h ass_fontselect.c \
- ass_render.h ass_render.c ass_render_api.c \
- ass_parse.h ass_parse.c ass_priv.h ass_shaper.h ass_shaper.c \
- ass_outline.h ass_outline.c ass_drawing.h ass_drawing.c \
- ass_rasterizer.h ass_rasterizer.c ass_rasterizer_c.c \
- ass_bitmap.h ass_bitmap.c ass_blur.c ass_func_template.h
-
-libass_la_LDFLAGS = -no-undefined -version-info $(LIBASS_LT_CURRENT):$(LIBASS_LT_REVISION):$(LIBASS_LT_AGE)
-libass_la_LDFLAGS += -export-symbols $(srcdir)/libass.sym
-
-if FONTCONFIG
-libass_la_SOURCES += $(SRC_FONTCONFIG)
-endif
-
-if DIRECTWRITE
-libass_la_SOURCES += $(SRC_DIRECTWRITE)
-endif
-
-if CORETEXT
-libass_la_SOURCES += $(SRC_CORETEXT)
-endif
-
-if ASM
-if INTEL
-libass_la_SOURCES += $(SRC_INTEL)
-if X64
-libass_la_SOURCES += $(SRC_INTEL64)
-endif
-endif
-endif
-
-assheadersdir = $(includedir)/ass
-dist_assheaders_HEADERS = ass.h ass_types.h
-
-EXTRA_DIST = libass.sym x86/x86inc.asm x86/utils.asm
diff --git a/libass/Makefile_library.am b/libass/Makefile_library.am
new file mode 100644
index 0000000..536d775
--- /dev/null
+++ b/libass/Makefile_library.am
@@ -0,0 +1,75 @@
+LIBASS_LT_CURRENT = 11
+LIBASS_LT_REVISION = 1
+LIBASS_LT_AGE = 2
+
+.asm.lo:
+ $(nasm_verbose)$(LIBTOOL) $(AM_V_lt) --tag=CC --mode=compile $(top_srcdir)/ltnasm.sh $(AS) $(ASFLAGS) -I$(top_srcdir)/libass/ -Dprivate_prefix=ass -o $@ $<
+
+lib_LTLIBRARIES += libass/libass.la
+libass_libass_la_SOURCES = \
+ libass/ass_utils.h libass/ass_utils.c \
+ libass/ass_string.h libass/ass_string.c \
+ libass/ass_compat.h libass/ass_strtod.c \
+ libass/ass_filesystem.h libass/ass_filesystem.c \
+ libass/ass_types.h libass/ass.h libass/ass_priv.h libass/ass.c \
+ libass/ass_library.h libass/ass_library.c \
+ libass/ass_cache_template.h libass/ass_cache.h libass/ass_cache.c \
+ libass/ass_font.h libass/ass_font.c \
+ libass/ass_fontselect.h libass/ass_fontselect.c \
+ libass/ass_parse.h libass/ass_parse.c \
+ libass/ass_shaper.h libass/ass_shaper.c \
+ libass/ass_outline.h libass/ass_outline.c \
+ libass/ass_drawing.h libass/ass_drawing.c \
+ libass/ass_bitmap.h libass/ass_bitmap.c libass/ass_blur.c \
+ libass/ass_rasterizer.h libass/ass_rasterizer.c \
+ libass/ass_render.h libass/ass_render.c libass/ass_render_api.c \
+ libass/ass_bitmap_engine.h libass/ass_bitmap_engine.c \
+ libass/c/rasterizer_template.h libass/c/c_rasterizer.c \
+ libass/c/c_blend_bitmaps.c \
+ libass/c/c_be_blur.c \
+ libass/c/blur_template.h libass/c/c_blur.c \
+ libass/wyhash.h
+
+if ASM
+if X86
+libass_libass_la_SOURCES += \
+ libass/x86/rasterizer.asm \
+ libass/x86/blend_bitmaps.asm \
+ libass/x86/be_blur.asm \
+ libass/x86/blur.asm \
+ libass/x86/cpuid.h libass/x86/cpuid.asm
+endif
+if AARCH64
+libass_libass_la_SOURCES += \
+ libass/aarch64/rasterizer.S \
+ libass/aarch64/blend_bitmaps.S \
+ libass/aarch64/be_blur.S \
+ libass/aarch64/blur.S \
+ libass/aarch64/asm.S
+endif
+endif
+
+if FONTCONFIG
+libass_libass_la_SOURCES += libass/ass_fontconfig.h libass/ass_fontconfig.c
+endif
+
+if DIRECTWRITE
+libass_libass_la_SOURCES += \
+ libass/dwrite_c.h \
+ libass/ass_directwrite_info_template.h \
+ libass/ass_directwrite.h libass/ass_directwrite.c
+endif
+
+if CORETEXT
+libass_libass_la_SOURCES += libass/ass_coretext.h libass/ass_coretext.c
+endif
+
+libass_libass_la_LDFLAGS = -no-undefined -version-info $(LIBASS_LT_CURRENT):$(LIBASS_LT_REVISION):$(LIBASS_LT_AGE)
+libass_libass_la_LDFLAGS += -export-symbols $(top_srcdir)/libass/libass.sym
+
+assheadersdir = $(includedir)/ass
+dist_assheaders_HEADERS = libass/ass_types.h libass/ass.h
+
+EXTRA_DIST += \
+ libass/x86/x86inc.asm libass/x86/utils.asm \
+ libass/libass.sym
diff --git a/libass/aarch64/asm.S b/libass/aarch64/asm.S
new file mode 100644
index 0000000..8b94ece
--- /dev/null
+++ b/libass/aarch64/asm.S
@@ -0,0 +1,280 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LIBASS_AARCH64_ASM_S
+#define LIBASS_AARCH64_ASM_S
+
+#include "config.h"
+
+#define x18 do_not_use_x18
+#define w18 do_not_use_w18
+
+/* Support macros for
+ * - Armv8.3-A Pointer Authentication and
+ * - Armv8.5-A Branch Target Identification
+ * features which require emitting a .note.gnu.property section with the
+ * appropriate architecture-dependent feature bits set.
+ *
+ * |AARCH64_SIGN_LINK_REGISTER| and |AARCH64_VALIDATE_LINK_REGISTER| expand to
+ * PACIxSP and AUTIxSP, respectively. |AARCH64_SIGN_LINK_REGISTER| should be
+ * used immediately before saving the LR register (x30) to the stack.
+ * |AARCH64_VALIDATE_LINK_REGISTER| should be used immediately after restoring
+ * it. Note |AARCH64_SIGN_LINK_REGISTER|'s modifications to LR must be undone
+ * with |AARCH64_VALIDATE_LINK_REGISTER| before RET. The SP register must also
+ * have the same value at the two points. For example:
+ *
+ * .global f
+ * f:
+ * AARCH64_SIGN_LINK_REGISTER
+ * stp x29, x30, [sp, #-96]!
+ * mov x29, sp
+ * ...
+ * ldp x29, x30, [sp], #96
+ * AARCH64_VALIDATE_LINK_REGISTER
+ * ret
+ *
+ * |AARCH64_VALID_CALL_TARGET| expands to BTI 'c'. Either it, or
+ * |AARCH64_SIGN_LINK_REGISTER|, must be used at every point that may be an
+ * indirect call target. In particular, all symbols exported from a file must
+ * begin with one of these macros. For example, a leaf function that does not
+ * save LR can instead use |AARCH64_VALID_CALL_TARGET|:
+ *
+ * .globl return_zero
+ * return_zero:
+ * AARCH64_VALID_CALL_TARGET
+ * mov x0, #0
+ * ret
+ *
+ * A non-leaf function which does not immediately save LR may need both macros
+ * because |AARCH64_SIGN_LINK_REGISTER| appears late. For example, the function
+ * may jump to an alternate implementation before setting up the stack:
+ *
+ * .globl with_early_jump
+ * with_early_jump:
+ * AARCH64_VALID_CALL_TARGET
+ * cmp x0, #128
+ * b.lt .Lwith_early_jump_128
+ * AARCH64_SIGN_LINK_REGISTER
+ * stp x29, x30, [sp, #-96]!
+ * mov x29, sp
+ * ...
+ * ldp x29, x30, [sp], #96
+ * AARCH64_VALIDATE_LINK_REGISTER
+ * ret
+ *
+ * .Lwith_early_jump_128:
+ * ...
+ * ret
+ *
+ * These annotations are only required with indirect calls. Private symbols that
+ * are only the target of direct calls do not require annotations. Also note
+ * that |AARCH64_VALID_CALL_TARGET| is only valid for indirect calls (BLR), not
+ * indirect jumps (BR). Indirect jumps in assembly are supported through
+ * |AARCH64_VALID_JUMP_TARGET|. Landing Pads which shall serve for jumps and
+ * calls can be created using |AARCH64_VALID_JUMP_CALL_TARGET|.
+ *
+ * Although not necessary, it is safe to use these macros in 32-bit ARM
+ * assembly. This may be used to simplify dual 32-bit and 64-bit files.
+ *
+ * References:
+ * - "ELF for the Arm® 64-bit Architecture"
+ * https: *github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst
+ * - "Providing protection for complex software"
+ * https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software
+ */
+#if defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT == 1)
+#define GNU_PROPERTY_AARCH64_BTI (1 << 0) // Has Branch Target Identification
+#define AARCH64_VALID_JUMP_CALL_TARGET hint #38 // BTI 'jc'
+#define AARCH64_VALID_CALL_TARGET hint #34 // BTI 'c'
+#define AARCH64_VALID_JUMP_TARGET hint #36 // BTI 'j'
+#else
+#define GNU_PROPERTY_AARCH64_BTI 0 // No Branch Target Identification
+#define AARCH64_VALID_JUMP_CALL_TARGET
+#define AARCH64_VALID_CALL_TARGET
+#define AARCH64_VALID_JUMP_TARGET
+#endif
+
+#if defined(__ARM_FEATURE_PAC_DEFAULT)
+
+#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 0)) != 0) // authentication using key A
+#define AARCH64_SIGN_LINK_REGISTER paciasp
+#define AARCH64_VALIDATE_LINK_REGISTER autiasp
+#elif ((__ARM_FEATURE_PAC_DEFAULT & (1 << 1)) != 0) // authentication using key B
+#define AARCH64_SIGN_LINK_REGISTER pacibsp
+#define AARCH64_VALIDATE_LINK_REGISTER autibsp
+#else
+#error Pointer authentication defines no valid key!
+#endif
+#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 2)) != 0) // authentication of leaf functions
+#error Authentication of leaf functions is enabled but not supported in dav1d!
+#endif
+#define GNU_PROPERTY_AARCH64_PAC (1 << 1)
+
+#elif defined(__APPLE__) && defined(__arm64e__)
+
+#define GNU_PROPERTY_AARCH64_PAC 0
+#define AARCH64_SIGN_LINK_REGISTER pacibsp
+#define AARCH64_VALIDATE_LINK_REGISTER autibsp
+
+#else /* __ARM_FEATURE_PAC_DEFAULT */
+
+#define GNU_PROPERTY_AARCH64_PAC 0
+#define AARCH64_SIGN_LINK_REGISTER
+#define AARCH64_VALIDATE_LINK_REGISTER
+
+#endif /* !__ARM_FEATURE_PAC_DEFAULT */
+
+
+#if (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__)
+ .pushsection .note.gnu.property, "a"
+ .balign 8
+ .long 4
+ .long 0x10
+ .long 0x5
+ .asciz "GNU"
+ .long 0xc0000000 /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+ .long 4
+ .long (GNU_PROPERTY_AARCH64_BTI | GNU_PROPERTY_AARCH64_PAC)
+ .long 0
+ .popsection
+#endif /* (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__) */
+
+#if !defined(PIC)
+#if defined(__PIC__)
+#define PIC __PIC__
+#elif defined(__pic__)
+#define PIC __pic__
+#endif
+#endif
+
+#ifndef PRIVATE_PREFIX
+#define PRIVATE_PREFIX ass_
+#endif
+
+#define PASTE(a,b) a ## b
+#define CONCAT(a,b) PASTE(a,b)
+
+#ifdef PREFIX
+#define EXTERN CONCAT(_,PRIVATE_PREFIX)
+#else
+#define EXTERN PRIVATE_PREFIX
+#endif
+
+.macro function name, export=0, align=2
+ .macro endfunc
+#ifdef __ELF__
+ .size \name, . - \name
+#endif
+#if HAVE_AS_FUNC
+ .endfunc
+#endif
+ .purgem endfunc
+ .endm
+ .text
+ .align \align
+ .if \export
+ .global EXTERN\name
+#ifdef __ELF__
+ .type EXTERN\name, %function
+ .hidden EXTERN\name
+#elif defined(__MACH__)
+ .private_extern EXTERN\name
+#endif
+#if HAVE_AS_FUNC
+ .func EXTERN\name
+#endif
+EXTERN\name:
+ .else
+#ifdef __ELF__
+ .type \name, %function
+#endif
+#if HAVE_AS_FUNC
+ .func \name
+#endif
+ .endif
+\name:
+ .if \export
+ AARCH64_VALID_CALL_TARGET
+ .endif
+.endm
+
+.macro const name, export=0, align=2
+ .macro endconst
+#ifdef __ELF__
+ .size \name, . - \name
+#endif
+ .purgem endconst
+ .endm
+#if defined(_WIN32)
+ .section .rdata
+#elif !defined(__MACH__)
+ .section .rodata
+#else
+ .const_data
+#endif
+ .align \align
+ .if \export
+ .global EXTERN\name
+#ifdef __ELF__
+ .hidden EXTERN\name
+#elif defined(__MACH__)
+ .private_extern EXTERN\name
+#endif
+EXTERN\name:
+ .endif
+\name:
+.endm
+
+.macro movrel rd, val, offset=0
+#if defined(__APPLE__)
+ .if \offset < 0
+ adrp \rd, \val@PAGE
+ add \rd, \rd, \val@PAGEOFF
+ sub \rd, \rd, -(\offset)
+ .else
+ adrp \rd, \val+(\offset)@PAGE
+ add \rd, \rd, \val+(\offset)@PAGEOFF
+ .endif
+#elif defined(PIC) && defined(_WIN32)
+ .if \offset < 0
+ adrp \rd, \val
+ add \rd, \rd, :lo12:\val
+ sub \rd, \rd, -(\offset)
+ .else
+ adrp \rd, \val+(\offset)
+ add \rd, \rd, :lo12:\val+(\offset)
+ .endif
+#elif defined(PIC)
+ adrp \rd, \val+(\offset)
+ add \rd, \rd, :lo12:\val+(\offset)
+#else
+ ldr \rd, =\val+\offset
+#endif
+.endm
+
+
+#endif /* LIBASS_AARCH64_ASM_S */
diff --git a/libass/aarch64/be_blur.S b/libass/aarch64/be_blur.S
new file mode 100644
index 0000000..847df63
--- /dev/null
+++ b/libass/aarch64/be_blur.S
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2021 rcombs
+ *
+ * This file is part of libass.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "asm.S"
+
+/*
+ * void be_blur(uint8_t *buf, intptr_t stride,
+ * intptr_t width, intptr_t height, uint16_t *tmp);
+ */
+
+function be_blur_neon, export=1
+ sub x1, x1, x2
+ and x1, x1, ~15
+ mov x6, x0
+ mov x7, x4
+ movi v16.16b, 0
+ mov x9, x2
+
+ ld1 {v3.16b}, [x0], #16
+ ushll v4.8h, v3.8b, 0
+
+ ext v5.16b, v16.16b, v4.16b, 14
+ add v5.8h, v5.8h, v4.8h
+
+ ushll2 v0.8h, v3.16b, 0
+ b 1f
+
+0:
+ ld1 {v3.16b}, [x0], #16
+ ushll v4.8h, v3.8b, 0
+ ext v5.16b, v0.16b, v4.16b, 14
+ add v5.8h, v5.8h, v4.8h
+ ushll2 v0.8h, v3.16b, 0
+ ext v3.16b, v1.16b, v5.16b, 2
+ add v3.8h, v3.8h, v1.8h
+ mov v2.16b, v3.16b
+
+ st1 {v2.8h, v3.8h}, [x4], #32
+
+1:
+ ext v1.16b, v4.16b, v0.16b, 14
+ add v1.8h, v1.8h, v0.8h
+ ext v3.16b, v5.16b, v1.16b, 2
+ add v3.8h, v3.8h, v5.8h
+
+ mov v4.16b, v3.16b
+ st1 {v3.8h, v4.8h}, [x4], #32
+
+ subs x2, x2, 16
+ b.hi 0b
+
+ ext v0.16b, v0.16b, v16.16b, 14
+ ext v3.16b, v1.16b, v0.16b, 2
+ add v3.8h, v3.8h, v1.8h
+
+ mov v4.16b, v3.16b
+ st1 {v3.8h, v4.8h}, [x4], #32
+
+ add x0, x0, x1
+ subs x3, x3, 1
+ b.le 3f
+
+0:
+ mov x4, x7
+ mov x2, x9
+ ld1 {v2.16b}, [x0], #16
+ ushll v4.8h, v2.8b, 0
+ ext v5.16b, v16.16b, v4.16b, 14
+ add v5.8h, v5.8h, v4.8h
+ ushll2 v0.8h, v2.16b, 0
+
+ b 2f
+
+1:
+ ld1 {v2.16b}, [x0], #16
+ ushll v4.8h, v2.8b, 0
+ ext v5.16b, v0.16b, v4.16b, 14
+ add v5.8h, v5.8h, v4.8h
+ ushll2 v0.8h, v2.16b, 0
+ ext v2.16b, v1.16b, v5.16b, 2
+ add v6.8h, v2.8h, v1.8h
+
+ ld1 {v1.8h, v2.8h}, [x4]
+ add v7.8h, v1.8h, v6.8h
+ st1 {v6.8h, v7.8h}, [x4], #32
+ add v2.8h, v2.8h, v7.8h
+ uqshrn2 v3.16b, v2.8h, 4
+
+ st1 {v3.16b}, [x6], #16
+
+2:
+ ext v1.16b, v4.16b, v0.16b, 14
+ add v1.8h, v1.8h, v0.8h
+ ext v2.16b, v5.16b, v1.16b, 2
+ add v2.8h, v2.8h, v5.8h
+
+ ld1 {v3.8h, v4.8h}, [x4]
+ add v3.8h, v3.8h, v2.8h
+ st1 {v2.8h, v3.8h}, [x4], #32
+ add v4.8h, v4.8h, v3.8h
+ uqshrn v3.8b, v4.8h, 4
+
+ subs x2, x2, 16
+ b.hi 1b
+
+ ext v0.16b, v0.16b, v16.16b, 14
+ ext v2.16b, v1.16b, v0.16b, 2
+ add v4.8h, v2.8h, v1.8h
+
+ ld1 {v0.8h, v1.8h}, [x4]
+ add v5.8h, v0.8h, v4.8h
+ st1 {v4.8h, v5.8h}, [x4], #32
+ add v1.8h, v1.8h, v5.8h
+ uqshrn2 v3.16b, v1.8h, 4
+ st1 {v3.16b}, [x6], #16
+
+ add x0, x0, x1
+ add x6, x6, x1
+ subs x3, x3, 1
+ b.hi 0b
+
+3:
+ mov x2, x9
+ mov x4, x7
+0:
+ ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x4], #64
+ add v2.8h, v2.8h, v3.8h
+ uqshrn v2.8b, v2.8h, 4
+ add v3.8h, v4.8h, v5.8h
+ uqshrn2 v2.16b, v3.8h, 4
+ st1 {v2.16b}, [x6], #16
+ subs x2, x2, 16
+ b.hi 0b
+ ret
+endfunc
diff --git a/libass/aarch64/blend_bitmaps.S b/libass/aarch64/blend_bitmaps.S
new file mode 100644
index 0000000..2e8f053
--- /dev/null
+++ b/libass/aarch64/blend_bitmaps.S
@@ -0,0 +1,162 @@