summaryrefslogtreecommitdiffstats
path: root/libass/x86
diff options
context:
space:
mode:
authorDr.Smile <vabnick@gmail.com>2015-02-09 05:01:55 +0300
committerDr.Smile <vabnick@gmail.com>2015-02-09 05:01:55 +0300
commit820b5a85f9bc56ac3361ff546331f0b318570005 (patch)
treedfc46feee5364181178dc834e7ae22a268ea92e9 /libass/x86
parent82a5bb2040ec63aec11043d5249bac68eed6632d (diff)
downloadlibass-820b5a85f9bc56ac3361ff546331f0b318570005.tar.bz2
libass-820b5a85f9bc56ac3361ff546331f0b318570005.tar.xz
Skip memset() when using internal rasterizer
Diffstat (limited to 'libass/x86')
-rw-r--r--libass/x86/rasterizer.asm15
-rw-r--r--libass/x86/rasterizer.h8
2 files changed, 16 insertions, 7 deletions
diff --git a/libass/x86/rasterizer.asm b/libass/x86/rasterizer.asm
index d4941c6d..bbb2921e 100644
--- a/libass/x86/rasterizer.asm
+++ b/libass/x86/rasterizer.asm
@@ -116,12 +116,21 @@ SECTION .text
;------------------------------------------------------------------------------
; FILL_SOLID_TILE tile_order, suffix
-; void fill_solid_tile%2(uint8_t *buf, ptrdiff_t stride);
+; void fill_solid_tile%2(uint8_t *buf, ptrdiff_t stride, int set);
;------------------------------------------------------------------------------
%macro FILL_SOLID_TILE 2
-cglobal fill_solid_tile%2, 2,2,1
- pcmpeqd m0, m0
+cglobal fill_solid_tile%2, 3,4,1
+ mov r3d, -1
+ test r2d, r2d
+ cmovnz r2d, r3d
+ movd xm0, r2d
+%if mmsize == 32
+ vpbroadcastd m0, xm0
+%else
+ pshufd m0, m0, q0000
+%endif
+
%rep (1 << %1) - 1
FILL_LINE r0, 0, 1 << %1
add r0, r1
diff --git a/libass/x86/rasterizer.h b/libass/x86/rasterizer.h
index 11ea3d4f..47d72d45 100644
--- a/libass/x86/rasterizer.h
+++ b/libass/x86/rasterizer.h
@@ -25,8 +25,8 @@
struct segment;
-void ass_fill_solid_tile16_sse2(uint8_t *buf, ptrdiff_t stride);
-void ass_fill_solid_tile32_sse2(uint8_t *buf, ptrdiff_t stride);
+void ass_fill_solid_tile16_sse2(uint8_t *buf, ptrdiff_t stride, int set);
+void ass_fill_solid_tile32_sse2(uint8_t *buf, ptrdiff_t stride, int set);
void ass_fill_halfplane_tile16_sse2(uint8_t *buf, ptrdiff_t stride,
int32_t a, int32_t b, int64_t c, int32_t scale);
void ass_fill_halfplane_tile32_sse2(uint8_t *buf, ptrdiff_t stride,
@@ -38,8 +38,8 @@ void ass_fill_generic_tile32_sse2(uint8_t *buf, ptrdiff_t stride,
const struct segment *line, size_t n_lines,
int winding);
-void ass_fill_solid_tile16_avx2(uint8_t *buf, ptrdiff_t stride);
-void ass_fill_solid_tile32_avx2(uint8_t *buf, ptrdiff_t stride);
+void ass_fill_solid_tile16_avx2(uint8_t *buf, ptrdiff_t stride, int set);
+void ass_fill_solid_tile32_avx2(uint8_t *buf, ptrdiff_t stride, int set);
void ass_fill_halfplane_tile16_avx2(uint8_t *buf, ptrdiff_t stride,
int32_t a, int32_t b, int64_t c, int32_t scale);
void ass_fill_halfplane_tile32_avx2(uint8_t *buf, ptrdiff_t stride,