summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRodger Combs <rodger.combs@gmail.com>2016-01-31 08:07:35 -0600
committerRodger Combs <rodger.combs@gmail.com>2016-07-15 06:01:51 -0500
commit7a7b66ab341e8c9faa9cce007c53a502df5b5bb1 (patch)
tree74174bbea96990ff3fad7fe1393fb1d6f2724840
parent2f4012036f9ab552deacf27af2df5cbdc3b4a067 (diff)
downloadlibass-7a7b66ab341e8c9faa9cce007c53a502df5b5bb1.tar.bz2
libass-7a7b66ab341e8c9faa9cce007c53a502df5b5bb1.tar.xz
Initial messy ARM ASM work
-rw-r--r--configure.ac30
-rw-r--r--libass/Makefile.am5
-rw-r--r--libass/arm/asm.S11
-rw-r--r--libass/arm/blend_bitmaps.S67
-rw-r--r--libass/ass_bitmap.c12
-rw-r--r--libass/ass_bitmap.h1
-rw-r--r--libass/ass_func_template.h20
-rw-r--r--libass/ass_render.c2
8 files changed, 129 insertions, 19 deletions
diff --git a/configure.ac b/configure.ac
index 5c61f1a..084e484 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,21 +74,24 @@ AS_IF([test x$enable_asm != xno], [
X64=true
BITS=64
ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -m amd64" ],
+ [armv7*], [
+ ARM=true
+ BITS=32 ],
)
- AS_CASE([$host],
- [*darwin*], [
- ASFLAGS="$ASFLAGS -f macho$BITS -DPREFIX -DHAVE_ALIGNED_STACK=1" ],
- [*linux*|*dragonfly*|*bsd*|*solaris*], [
- ASFLAGS="$ASFLAGS -f elf -DHAVE_ALIGNED_STACK=1" ],
- [*cygwin*|*mingw*], [
- ASFLAGS="$ASFLAGS -f win$BITS"
- AS_IF([test x$BITS = x64], [
- ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=1"
- ], [
- ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=0 -DPREFIX"
- ])
- ])
AS_IF([test x$INTEL = xtrue], [
+ AS_CASE([$host],
+ [*darwin*], [
+ ASFLAGS="$ASFLAGS -f macho$BITS -DPREFIX -DHAVE_ALIGNED_STACK=1" ],
+ [*linux*|*dragonfly*|*bsd*|*solaris*], [
+ ASFLAGS="$ASFLAGS -f elf -DHAVE_ALIGNED_STACK=1" ],
+ [*cygwin*|*mingw*], [
+ ASFLAGS="$ASFLAGS -f win$BITS"
+ AS_IF([test x$BITS = x64], [
+ ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=1"
+ ], [
+ ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=0 -DPREFIX"
+ ])
+ ])
AC_CHECK_PROG([yasm_check], [$AS], [yes])
AS_IF([test x$yasm_check != xyes], [
AC_MSG_WARN(yasm was not found; ASM functions are disabled.)
@@ -116,6 +119,7 @@ AC_SUBST([AS], ["$AS"])
AM_CONDITIONAL([ASM], [test x$enable_asm != xno])
AM_CONDITIONAL([INTEL], [test x$INTEL = xtrue])
+AM_CONDITIONAL([ARM], [test x$ARM = xtrue])
AM_CONDITIONAL([X86], [test x$X86 = xtrue])
AM_CONDITIONAL([X64], [test x$X64 = xtrue])
diff --git a/libass/Makefile.am b/libass/Makefile.am
index f2ecb74..1dece0d 100644
--- a/libass/Makefile.am
+++ b/libass/Makefile.am
@@ -18,6 +18,8 @@ SRC_INTEL = x86/blend_bitmaps.asm x86/blur.asm x86/cpuid.asm x86/utils.asm \
SRC_INTEL64 = x86/be_blur.asm
SRC_INTEL_RASTERIZER = x86/rasterizer.asm
+SRC_ARM = arm/blend_bitmaps.S
+
SRC_RASTERIZER = ass_rasterizer.h ass_rasterizer.c ass_rasterizer_c.c
SRC_DIRECTWRITE = ass_directwrite.c ass_directwrite.h dwrite_c.h
@@ -58,6 +60,9 @@ if X64
libass_la_SOURCES += $(SRC_INTEL64)
endif
endif
+if ARM
+libass_la_SOURCES += $(SRC_ARM)
+endif
endif
assheadersdir = $(includedir)/ass
diff --git a/libass/arm/asm.S b/libass/arm/asm.S
new file mode 100644
index 0000000..4b98608
--- /dev/null
+++ b/libass/arm/asm.S
@@ -0,0 +1,11 @@
+.macro function name
+ .macro endfunc
+ .size ass_\name, . - ass_\name
+ .endfunc
+ .purgem endfunc
+ .endm
+ .global ass_\name
+ .type ass_\name, %function
+ .func ass_\name
+ass_\name:
+.endm
diff --git a/libass/arm/blend_bitmaps.S b/libass/arm/blend_bitmaps.S
new file mode 100644
index 0000000..c72d923
--- /dev/null
+++ b/libass/arm/blend_bitmaps.S
@@ -0,0 +1,67 @@
+/*
+ * void add_bitmaps( uint8_t *dst, intptr_t dst_stride,
+ * uint8_t *src, intptr_t src_stride,
+ * intptr_t width, intptr_t height );
+*/
+
+#include "asm.S"
+
+.macro blend_func type
+function \type\()_bitmaps_arm
+ push {r4-r7}
+ ldrd r4, r5, [sp, #16]
+@.Lskip_prologue:
+ mla r5, r5, r3, r2
+0: @ height loop
+ mov r6, #0 @ x offset
+1: @ stride loop
+ ldr r7, [r0, r6]
+ ldr ip, [r2, r6]
+ uq\type\()8 ip, r7, ip
+ str ip, [r0, r6]
+ add r6, #4
+ cmp r6, r4
+ blo 1b @ still in scan line
+ add r0, r1
+ add r2, r3
+ cmp r2, r5
+ blo 0b
+ pop {r4-r7}
+ bx lr
+endfunc
+.endm
+
+blend_func add
+blend_func sub
+
+/*
+ * void mul_bitmaps( uint8_t *dst, intptr_t dst_stride,
+ * uint8_t *src1, intptr_t src1_stride,
+ * uint8_t *src2, intptr_t src2_stride,
+ * intptr_t width, intptr_t height );
+*/
+function mul_bitmaps_arm
+ mov r11, #255
+ push {r4-r11}
+ ldrd r4, r5, [sp, #32]
+ ldrd r6, r7, [sp, #40]
+ mla r7, r7, r3, r2 @ last address
+0: @ height loop
+ mov r8, #0 @ x offset
+1: @ stride loop
+ ldrb r9, [r2, r8]
+ ldrb r10, [r4, r8]
+ mla r9, r9, r10, r11
+ asr r9, r9, #8
+ strb r9, [r0, r8]
+ add r8, r8, #1
+ cmp r8, r6
+ blo 1b @ still in scan line
+ add r0, r1
+ add r2, r3
+ add r4, r5
+ cmp r2, r7
+ blo 0b
+ pop {r4-r11}
+ bx lr
+endfunc
diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c
index 73620dd..083b0e2 100644
--- a/libass/ass_bitmap.c
+++ b/libass/ass_bitmap.c
@@ -41,7 +41,8 @@
#undef ALIGN
#undef DECORATE
-#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
+#if CONFIG_ASM
+#if (defined(__i386__) || defined(__x86_64__))
#define ALIGN 4
#define DECORATE(func) ass_##func##_sse2
@@ -54,7 +55,16 @@
#include "ass_func_template.h"
#undef ALIGN
#undef DECORATE
+#elif defined(__arm__)
+#define ALIGN 5
+#define DECORATE(func) ass_##func##_arm // mostly not ported yet
+#define DECORATE2(func) ass_##func##_arm // mostly not ported yet
+#include "ass_func_template.h"
+#undef ALIGN
+#undef DECORATE
+
+#endif
#endif
diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h
index e4c1451..84a2bc8 100644
--- a/libass/ass_bitmap.h
+++ b/libass/ass_bitmap.h
@@ -96,6 +96,7 @@ typedef struct {
extern const BitmapEngine ass_bitmap_engine_c;
extern const BitmapEngine ass_bitmap_engine_sse2;
extern const BitmapEngine ass_bitmap_engine_avx2;
+extern const BitmapEngine ass_bitmap_engine_arm;
typedef struct {
diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h
index 0eccdfb..6f4755e 100644
--- a/libass/ass_func_template.h
+++ b/libass/ass_func_template.h
@@ -93,10 +93,18 @@ void DECORATE(blur1246_vert)(int16_t *dst, const int16_t *src,
uintptr_t src_width, uintptr_t src_height,
const int16_t *param);
-
const BitmapEngine DECORATE(bitmap_engine) = {
.align_order = ALIGN,
+#ifdef __arm__
+#undef DECORATE
+#define DECORATE(func) ass_##func##_c
+#endif
+
+#ifndef DECORATE2
+#define DECORATE2(x) DECORATE(x)
+#endif
+
#if CONFIG_RASTERIZER
#if CONFIG_LARGE_TILES
.tile_order = 5,
@@ -111,10 +119,10 @@ const BitmapEngine DECORATE(bitmap_engine) = {
#endif
#endif
- .add_bitmaps = DECORATE(add_bitmaps),
-#ifdef __x86_64__
- .sub_bitmaps = DECORATE(sub_bitmaps),
- .mul_bitmaps = DECORATE(mul_bitmaps),
+ .add_bitmaps = DECORATE2(add_bitmaps),
+#if defined(__x86_64__) || defined(__arm__)
+ .sub_bitmaps = DECORATE2(sub_bitmaps),
+ .mul_bitmaps = DECORATE2(mul_bitmaps),
#else
.sub_bitmaps = ass_sub_bitmaps_c,
.mul_bitmaps = ass_mul_bitmaps_c,
@@ -139,3 +147,5 @@ const BitmapEngine DECORATE(bitmap_engine) = {
.main_blur_horz = { DECORATE(blur1234_horz), DECORATE(blur1235_horz), DECORATE(blur1246_horz) },
.main_blur_vert = { DECORATE(blur1234_vert), DECORATE(blur1235_vert), DECORATE(blur1246_vert) },
};
+
+#undef DECORATE2
diff --git a/libass/ass_render.c b/libass/ass_render.c
index 77c7e30..42dbf6e 100644
--- a/libass/ass_render.c
+++ b/libass/ass_render.c
@@ -70,6 +70,8 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library)
priv->engine = &ass_bitmap_engine_sse2;
else
priv->engine = &ass_bitmap_engine_c;
+#elif defined(__arm__) && CONFIG_ASM
+ priv->engine = &ass_bitmap_engine_arm;
#else
priv->engine = &ass_bitmap_engine_c;
#endif