From 7a7b66ab341e8c9faa9cce007c53a502df5b5bb1 Mon Sep 17 00:00:00 2001 From: Rodger Combs Date: Sun, 31 Jan 2016 08:07:35 -0600 Subject: Initial messy ARM ASM work --- libass/Makefile.am | 5 ++++ libass/arm/asm.S | 11 ++++++++ libass/arm/blend_bitmaps.S | 67 ++++++++++++++++++++++++++++++++++++++++++++++ libass/ass_bitmap.c | 12 ++++++++- libass/ass_bitmap.h | 1 + libass/ass_func_template.h | 20 ++++++++++---- libass/ass_render.c | 2 ++ 7 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 libass/arm/asm.S create mode 100644 libass/arm/blend_bitmaps.S (limited to 'libass') diff --git a/libass/Makefile.am b/libass/Makefile.am index f2ecb745..1dece0d1 100644 --- a/libass/Makefile.am +++ b/libass/Makefile.am @@ -18,6 +18,8 @@ SRC_INTEL = x86/blend_bitmaps.asm x86/blur.asm x86/cpuid.asm x86/utils.asm \ SRC_INTEL64 = x86/be_blur.asm SRC_INTEL_RASTERIZER = x86/rasterizer.asm +SRC_ARM = arm/blend_bitmaps.S + SRC_RASTERIZER = ass_rasterizer.h ass_rasterizer.c ass_rasterizer_c.c SRC_DIRECTWRITE = ass_directwrite.c ass_directwrite.h dwrite_c.h @@ -58,6 +60,9 @@ if X64 libass_la_SOURCES += $(SRC_INTEL64) endif endif +if ARM +libass_la_SOURCES += $(SRC_ARM) +endif endif assheadersdir = $(includedir)/ass diff --git a/libass/arm/asm.S b/libass/arm/asm.S new file mode 100644 index 00000000..4b98608b --- /dev/null +++ b/libass/arm/asm.S @@ -0,0 +1,11 @@ +.macro function name + .macro endfunc + .size ass_\name, . - ass_\name + .endfunc + .purgem endfunc + .endm + .global ass_\name + .type ass_\name, %function + .func ass_\name +ass_\name: +.endm diff --git a/libass/arm/blend_bitmaps.S b/libass/arm/blend_bitmaps.S new file mode 100644 index 00000000..c72d9239 --- /dev/null +++ b/libass/arm/blend_bitmaps.S @@ -0,0 +1,67 @@ +/* + * void add_bitmaps( uint8_t *dst, intptr_t dst_stride, + * uint8_t *src, intptr_t src_stride, + * intptr_t width, intptr_t height ); +*/ + +#include "asm.S" + +.macro blend_func type +function \type\()_bitmaps_arm + push {r4-r7} + ldrd r4, r5, [sp, #16] +@.Lskip_prologue: + mla r5, r5, r3, r2 +0: @ height loop + mov r6, #0 @ x offset +1: @ stride loop + ldr r7, [r0, r6] + ldr ip, [r2, r6] + uq\type\()8 ip, r7, ip + str ip, [r0, r6] + add r6, #4 + cmp r6, r4 + blo 1b @ still in scan line + add r0, r1 + add r2, r3 + cmp r2, r5 + blo 0b + pop {r4-r7} + bx lr +endfunc +.endm + +blend_func add +blend_func sub + +/* + * void mul_bitmaps( uint8_t *dst, intptr_t dst_stride, + * uint8_t *src1, intptr_t src1_stride, + * uint8_t *src2, intptr_t src2_stride, + * intptr_t width, intptr_t height ); +*/ +function mul_bitmaps_arm + mov r11, #255 + push {r4-r11} + ldrd r4, r5, [sp, #32] + ldrd r6, r7, [sp, #40] + mla r7, r7, r3, r2 @ last address +0: @ height loop + mov r8, #0 @ x offset +1: @ stride loop + ldrb r9, [r2, r8] + ldrb r10, [r4, r8] + mla r9, r9, r10, r11 + asr r9, r9, #8 + strb r9, [r0, r8] + add r8, r8, #1 + cmp r8, r6 + blo 1b @ still in scan line + add r0, r1 + add r2, r3 + add r4, r5 + cmp r2, r7 + blo 0b + pop {r4-r11} + bx lr +endfunc diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c index 73620ddf..083b0e24 100644 --- a/libass/ass_bitmap.c +++ b/libass/ass_bitmap.c @@ -41,7 +41,8 @@ #undef ALIGN #undef DECORATE -#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM +#if CONFIG_ASM +#if (defined(__i386__) || defined(__x86_64__)) #define ALIGN 4 #define DECORATE(func) ass_##func##_sse2 @@ -54,7 +55,16 @@ #include "ass_func_template.h" #undef ALIGN #undef DECORATE +#elif defined(__arm__) +#define ALIGN 5 +#define DECORATE(func) ass_##func##_arm // mostly not ported yet +#define DECORATE2(func) ass_##func##_arm // mostly not ported yet +#include "ass_func_template.h" +#undef ALIGN +#undef DECORATE + +#endif #endif diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h index e4c1451c..84a2bc87 100644 --- a/libass/ass_bitmap.h +++ b/libass/ass_bitmap.h @@ -96,6 +96,7 @@ typedef struct { extern const BitmapEngine ass_bitmap_engine_c; extern const BitmapEngine ass_bitmap_engine_sse2; extern const BitmapEngine ass_bitmap_engine_avx2; +extern const BitmapEngine ass_bitmap_engine_arm; typedef struct { diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h index 0eccdfb7..6f4755ea 100644 --- a/libass/ass_func_template.h +++ b/libass/ass_func_template.h @@ -93,10 +93,18 @@ void DECORATE(blur1246_vert)(int16_t *dst, const int16_t *src, uintptr_t src_width, uintptr_t src_height, const int16_t *param); - const BitmapEngine DECORATE(bitmap_engine) = { .align_order = ALIGN, +#ifdef __arm__ +#undef DECORATE +#define DECORATE(func) ass_##func##_c +#endif + +#ifndef DECORATE2 +#define DECORATE2(x) DECORATE(x) +#endif + #if CONFIG_RASTERIZER #if CONFIG_LARGE_TILES .tile_order = 5, @@ -111,10 +119,10 @@ const BitmapEngine DECORATE(bitmap_engine) = { #endif #endif - .add_bitmaps = DECORATE(add_bitmaps), -#ifdef __x86_64__ - .sub_bitmaps = DECORATE(sub_bitmaps), - .mul_bitmaps = DECORATE(mul_bitmaps), + .add_bitmaps = DECORATE2(add_bitmaps), +#if defined(__x86_64__) || defined(__arm__) + .sub_bitmaps = DECORATE2(sub_bitmaps), + .mul_bitmaps = DECORATE2(mul_bitmaps), #else .sub_bitmaps = ass_sub_bitmaps_c, .mul_bitmaps = ass_mul_bitmaps_c, @@ -139,3 +147,5 @@ const BitmapEngine DECORATE(bitmap_engine) = { .main_blur_horz = { DECORATE(blur1234_horz), DECORATE(blur1235_horz), DECORATE(blur1246_horz) }, .main_blur_vert = { DECORATE(blur1234_vert), DECORATE(blur1235_vert), DECORATE(blur1246_vert) }, }; + +#undef DECORATE2 diff --git a/libass/ass_render.c b/libass/ass_render.c index 77c7e304..42dbf6e7 100644 --- a/libass/ass_render.c +++ b/libass/ass_render.c @@ -70,6 +70,8 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library) priv->engine = &ass_bitmap_engine_sse2; else priv->engine = &ass_bitmap_engine_c; +#elif defined(__arm__) && CONFIG_ASM + priv->engine = &ass_bitmap_engine_arm; #else priv->engine = &ass_bitmap_engine_c; #endif -- cgit v1.2.3