diff options
author | Rodger Combs <rodger.combs@gmail.com> | 2016-01-31 08:07:35 -0600 |
---|---|---|
committer | Rodger Combs <rodger.combs@gmail.com> | 2016-07-15 06:01:51 -0500 |
commit | 7a7b66ab341e8c9faa9cce007c53a502df5b5bb1 (patch) | |
tree | 74174bbea96990ff3fad7fe1393fb1d6f2724840 | |
parent | 2f4012036f9ab552deacf27af2df5cbdc3b4a067 (diff) | |
download | libass-7a7b66ab341e8c9faa9cce007c53a502df5b5bb1.tar.bz2 libass-7a7b66ab341e8c9faa9cce007c53a502df5b5bb1.tar.xz |
Initial messy ARM ASM work
-rw-r--r-- | configure.ac | 30 | ||||
-rw-r--r-- | libass/Makefile.am | 5 | ||||
-rw-r--r-- | libass/arm/asm.S | 11 | ||||
-rw-r--r-- | libass/arm/blend_bitmaps.S | 67 | ||||
-rw-r--r-- | libass/ass_bitmap.c | 12 | ||||
-rw-r--r-- | libass/ass_bitmap.h | 1 | ||||
-rw-r--r-- | libass/ass_func_template.h | 20 | ||||
-rw-r--r-- | libass/ass_render.c | 2 |
8 files changed, 129 insertions, 19 deletions
diff --git a/configure.ac b/configure.ac index 5c61f1a..084e484 100644 --- a/configure.ac +++ b/configure.ac @@ -74,21 +74,24 @@ AS_IF([test x$enable_asm != xno], [ X64=true BITS=64 ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -m amd64" ], + [armv7*], [ + ARM=true + BITS=32 ], ) - AS_CASE([$host], - [*darwin*], [ - ASFLAGS="$ASFLAGS -f macho$BITS -DPREFIX -DHAVE_ALIGNED_STACK=1" ], - [*linux*|*dragonfly*|*bsd*|*solaris*], [ - ASFLAGS="$ASFLAGS -f elf -DHAVE_ALIGNED_STACK=1" ], - [*cygwin*|*mingw*], [ - ASFLAGS="$ASFLAGS -f win$BITS" - AS_IF([test x$BITS = x64], [ - ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=1" - ], [ - ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=0 -DPREFIX" - ]) - ]) AS_IF([test x$INTEL = xtrue], [ + AS_CASE([$host], + [*darwin*], [ + ASFLAGS="$ASFLAGS -f macho$BITS -DPREFIX -DHAVE_ALIGNED_STACK=1" ], + [*linux*|*dragonfly*|*bsd*|*solaris*], [ + ASFLAGS="$ASFLAGS -f elf -DHAVE_ALIGNED_STACK=1" ], + [*cygwin*|*mingw*], [ + ASFLAGS="$ASFLAGS -f win$BITS" + AS_IF([test x$BITS = x64], [ + ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=1" + ], [ + ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=0 -DPREFIX" + ]) + ]) AC_CHECK_PROG([yasm_check], [$AS], [yes]) AS_IF([test x$yasm_check != xyes], [ AC_MSG_WARN(yasm was not found; ASM functions are disabled.) @@ -116,6 +119,7 @@ AC_SUBST([AS], ["$AS"]) AM_CONDITIONAL([ASM], [test x$enable_asm != xno]) AM_CONDITIONAL([INTEL], [test x$INTEL = xtrue]) +AM_CONDITIONAL([ARM], [test x$ARM = xtrue]) AM_CONDITIONAL([X86], [test x$X86 = xtrue]) AM_CONDITIONAL([X64], [test x$X64 = xtrue]) diff --git a/libass/Makefile.am b/libass/Makefile.am index f2ecb74..1dece0d 100644 --- a/libass/Makefile.am +++ b/libass/Makefile.am @@ -18,6 +18,8 @@ SRC_INTEL = x86/blend_bitmaps.asm x86/blur.asm x86/cpuid.asm x86/utils.asm \ SRC_INTEL64 = x86/be_blur.asm SRC_INTEL_RASTERIZER = x86/rasterizer.asm +SRC_ARM = arm/blend_bitmaps.S + SRC_RASTERIZER = ass_rasterizer.h ass_rasterizer.c ass_rasterizer_c.c SRC_DIRECTWRITE = ass_directwrite.c ass_directwrite.h dwrite_c.h @@ -58,6 +60,9 @@ if X64 libass_la_SOURCES += $(SRC_INTEL64) endif endif +if ARM +libass_la_SOURCES += $(SRC_ARM) +endif endif assheadersdir = $(includedir)/ass diff --git a/libass/arm/asm.S b/libass/arm/asm.S new file mode 100644 index 0000000..4b98608 --- /dev/null +++ b/libass/arm/asm.S @@ -0,0 +1,11 @@ +.macro function name + .macro endfunc + .size ass_\name, . - ass_\name + .endfunc + .purgem endfunc + .endm + .global ass_\name + .type ass_\name, %function + .func ass_\name +ass_\name: +.endm diff --git a/libass/arm/blend_bitmaps.S b/libass/arm/blend_bitmaps.S new file mode 100644 index 0000000..c72d923 --- /dev/null +++ b/libass/arm/blend_bitmaps.S @@ -0,0 +1,67 @@ +/* + * void add_bitmaps( uint8_t *dst, intptr_t dst_stride, + * uint8_t *src, intptr_t src_stride, + * intptr_t width, intptr_t height ); +*/ + +#include "asm.S" + +.macro blend_func type +function \type\()_bitmaps_arm + push {r4-r7} + ldrd r4, r5, [sp, #16] +@.Lskip_prologue: + mla r5, r5, r3, r2 +0: @ height loop + mov r6, #0 @ x offset +1: @ stride loop + ldr r7, [r0, r6] + ldr ip, [r2, r6] + uq\type\()8 ip, r7, ip + str ip, [r0, r6] + add r6, #4 + cmp r6, r4 + blo 1b @ still in scan line + add r0, r1 + add r2, r3 + cmp r2, r5 + blo 0b + pop {r4-r7} + bx lr +endfunc +.endm + +blend_func add +blend_func sub + +/* + * void mul_bitmaps( uint8_t *dst, intptr_t dst_stride, + * uint8_t *src1, intptr_t src1_stride, + * uint8_t *src2, intptr_t src2_stride, + * intptr_t width, intptr_t height ); +*/ +function mul_bitmaps_arm + mov r11, #255 + push {r4-r11} + ldrd r4, r5, [sp, #32] + ldrd r6, r7, [sp, #40] + mla r7, r7, r3, r2 @ last address +0: @ height loop + mov r8, #0 @ x offset +1: @ stride loop + ldrb r9, [r2, r8] + ldrb r10, [r4, r8] + mla r9, r9, r10, r11 + asr r9, r9, #8 + strb r9, [r0, r8] + add r8, r8, #1 + cmp r8, r6 + blo 1b @ still in scan line + add r0, r1 + add r2, r3 + add r4, r5 + cmp r2, r7 + blo 0b + pop {r4-r11} + bx lr +endfunc diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c index 73620dd..083b0e2 100644 --- a/libass/ass_bitmap.c +++ b/libass/ass_bitmap.c @@ -41,7 +41,8 @@ #undef ALIGN #undef DECORATE -#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM +#if CONFIG_ASM +#if (defined(__i386__) || defined(__x86_64__)) #define ALIGN 4 #define DECORATE(func) ass_##func##_sse2 @@ -54,7 +55,16 @@ #include "ass_func_template.h" #undef ALIGN #undef DECORATE +#elif defined(__arm__) +#define ALIGN 5 +#define DECORATE(func) ass_##func##_arm // mostly not ported yet +#define DECORATE2(func) ass_##func##_arm // mostly not ported yet +#include "ass_func_template.h" +#undef ALIGN +#undef DECORATE + +#endif #endif diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h index e4c1451..84a2bc8 100644 --- a/libass/ass_bitmap.h +++ b/libass/ass_bitmap.h @@ -96,6 +96,7 @@ typedef struct { extern const BitmapEngine ass_bitmap_engine_c; extern const BitmapEngine ass_bitmap_engine_sse2; extern const BitmapEngine ass_bitmap_engine_avx2; +extern const BitmapEngine ass_bitmap_engine_arm; typedef struct { diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h index 0eccdfb..6f4755e 100644 --- a/libass/ass_func_template.h +++ b/libass/ass_func_template.h @@ -93,10 +93,18 @@ void DECORATE(blur1246_vert)(int16_t *dst, const int16_t *src, uintptr_t src_width, uintptr_t src_height, const int16_t *param); - const BitmapEngine DECORATE(bitmap_engine) = { .align_order = ALIGN, +#ifdef __arm__ +#undef DECORATE +#define DECORATE(func) ass_##func##_c +#endif + +#ifndef DECORATE2 +#define DECORATE2(x) DECORATE(x) +#endif + #if CONFIG_RASTERIZER #if CONFIG_LARGE_TILES .tile_order = 5, @@ -111,10 +119,10 @@ const BitmapEngine DECORATE(bitmap_engine) = { #endif #endif - .add_bitmaps = DECORATE(add_bitmaps), -#ifdef __x86_64__ - .sub_bitmaps = DECORATE(sub_bitmaps), - .mul_bitmaps = DECORATE(mul_bitmaps), + .add_bitmaps = DECORATE2(add_bitmaps), +#if defined(__x86_64__) || defined(__arm__) + .sub_bitmaps = DECORATE2(sub_bitmaps), + .mul_bitmaps = DECORATE2(mul_bitmaps), #else .sub_bitmaps = ass_sub_bitmaps_c, .mul_bitmaps = ass_mul_bitmaps_c, @@ -139,3 +147,5 @@ const BitmapEngine DECORATE(bitmap_engine) = { .main_blur_horz = { DECORATE(blur1234_horz), DECORATE(blur1235_horz), DECORATE(blur1246_horz) }, .main_blur_vert = { DECORATE(blur1234_vert), DECORATE(blur1235_vert), DECORATE(blur1246_vert) }, }; + +#undef DECORATE2 diff --git a/libass/ass_render.c b/libass/ass_render.c index 77c7e30..42dbf6e 100644 --- a/libass/ass_render.c +++ b/libass/ass_render.c @@ -70,6 +70,8 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library) priv->engine = &ass_bitmap_engine_sse2; else priv->engine = &ass_bitmap_engine_c; +#elif defined(__arm__) && CONFIG_ASM + priv->engine = &ass_bitmap_engine_arm; #else priv->engine = &ass_bitmap_engine_c; #endif |