summaryrefslogtreecommitdiffstats
path: root/libass/x86/blend_bitmaps.asm
diff options
context:
space:
mode:
Diffstat (limited to 'libass/x86/blend_bitmaps.asm')
-rw-r--r--libass/x86/blend_bitmaps.asm44
1 files changed, 39 insertions, 5 deletions
diff --git a/libass/x86/blend_bitmaps.asm b/libass/x86/blend_bitmaps.asm
index 7bde78a1..63b23449 100644
--- a/libass/x86/blend_bitmaps.asm
+++ b/libass/x86/blend_bitmaps.asm
@@ -18,17 +18,48 @@
;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
;******************************************************************************
-%include "x86/x86inc.asm"
+%include "x86/utils.asm"
SECTION_RODATA 32
+%if ARCH_X86_64 || !PIC
times 32 db 0xFF
edge_mask: times 32 db 0x00
words_255: times 16 dw 0xFF
+%endif
SECTION .text
;------------------------------------------------------------------------------
+; LOAD_EDGE_MASK 1:m_dst, 2:n, 3:tmp
+; Set n last bytes of xmm/ymm register to zero and other bytes to 255
+;------------------------------------------------------------------------------
+
+%macro LOAD_EDGE_MASK 3
+%if !PIC
+ movu m%1, [edge_mask + %2 - mmsize]
+%elif ARCH_X86_64
+ lea %3, [rel edge_mask]
+ movu m%1, [%3 + %2 - mmsize]
+%elif mmsize <= STACK_ALIGNMENT
+ %assign %%pad -(stack_offset + gprsize) & (mmsize - 1)
+ pxor m%1, m%1
+ mova [rsp - %%pad - mmsize], m%1
+ pcmpeqb m%1, m%1
+ mova [rsp - %%pad - 2 * mmsize], m%1
+ movu m%1, [rsp + %2 - %%pad - 2 * mmsize]
+%else
+ mov %3, rsp
+ and %3, -mmsize
+ pxor m%1, m%1
+ mova [%3 - mmsize], m%1
+ pcmpeqb m%1, m%1
+ mova [%3 - 2 * mmsize], m%1
+ movu m%1, [%3 + %2 - 2 * mmsize]
+%endif
+%endmacro
+
+;------------------------------------------------------------------------------
; BLEND_BITMAPS 1:add/sub
; void add_bitmaps(uint8_t *dst, intptr_t dst_stride,
; uint8_t *src, intptr_t src_stride,
@@ -51,8 +82,7 @@ cglobal %1_bitmaps, 5,7,3
neg r4
mov r6, r4
and r4, mmsize - 1
- lea t0, [edge_mask]
- movu m2, [t0 + r4 - mmsize]
+ LOAD_EDGE_MASK 2, r4, t0
%if !ARCH_X86_64
mov r5, r5m
%endif
@@ -113,9 +143,13 @@ cglobal mul_bitmaps, 1,7,7
neg r6
mov t0, r6
and r6, mmsize - 1
- lea t1, [edge_mask]
- movu m4, [t1 + r6 - mmsize]
+ LOAD_EDGE_MASK 4, r6, t1
+%if ARCH_X86_64 || !PIC
mova m5, [words_255]
+%else
+ mov t1d, 255 * 0x10001
+ BCASTD 5, t1d
+%endif
pxor m6, m6
mov t1, r7m
imul t1, r5