summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRodger Combs <rodger.combs@gmail.com>2016-01-31 02:14:10 -0600
committerRodger Combs <rodger.combs@gmail.com>2016-07-15 06:01:50 -0500
commit2f4012036f9ab552deacf27af2df5cbdc3b4a067 (patch)
tree1630f652c19bab1d796b98a5e6c38834e0132133
parent748d479107b1e19a4f52f0f6acb47f78b2260430 (diff)
downloadlibass-2f4012036f9ab552deacf27af2df5cbdc3b4a067.tar.bz2
libass-2f4012036f9ab552deacf27af2df5cbdc3b4a067.tar.xz
Swap width and height args to add/sub blend functions for consistency
-rw-r--r--libass/ass_bitmap.c4
-rw-r--r--libass/ass_bitmap.h2
-rw-r--r--libass/ass_func_template.h4
-rw-r--r--libass/ass_render.c6
-rw-r--r--libass/x86/blend_bitmaps.asm100
5 files changed, 58 insertions, 58 deletions
diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c
index 78894c9..73620dd 100644
--- a/libass/ass_bitmap.c
+++ b/libass/ass_bitmap.c
@@ -579,7 +579,7 @@ int outline_to_bitmap2(ASS_Renderer *render_priv,
*/
void ass_add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width)
+ intptr_t width, intptr_t height)
{
unsigned out;
uint8_t* end = dst + dst_stride * height;
@@ -595,7 +595,7 @@ void ass_add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
void ass_sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width)
+ intptr_t width, intptr_t height)
{
short out;
uint8_t* end = dst + dst_stride * height;
diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h
index 71a3f5e..e4c1451 100644
--- a/libass/ass_bitmap.h
+++ b/libass/ass_bitmap.h
@@ -36,7 +36,7 @@ typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride,
typedef void (*BitmapBlendFunc)(uint8_t *dst, intptr_t dst_stride,
uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width);
+ intptr_t width, intptr_t height);
typedef void (*BitmapMulFunc)(uint8_t *dst, intptr_t dst_stride,
uint8_t *src1, intptr_t src1_stride,
uint8_t *src2, intptr_t src2_stride,
diff --git a/libass/ass_func_template.h b/libass/ass_func_template.h
index 6c72021..0eccdfb 100644
--- a/libass/ass_func_template.h
+++ b/libass/ass_func_template.h
@@ -33,10 +33,10 @@ void DECORATE(fill_generic_tile32)(uint8_t *buf, ptrdiff_t stride,
void DECORATE(add_bitmaps)(uint8_t *dst, intptr_t dst_stride,
uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width);
+ intptr_t width, intptr_t height);
void DECORATE(sub_bitmaps)(uint8_t *dst, intptr_t dst_stride,
uint8_t *src, intptr_t src_stride,
- intptr_t height, intptr_t width);
+ intptr_t width, intptr_t height);
void DECORATE(mul_bitmaps)(uint8_t *dst, intptr_t dst_stride,
uint8_t *src1, intptr_t src1_stride,
uint8_t *src2, intptr_t src2_stride,
diff --git a/libass/ass_render.c b/libass/ass_render.c
index f124114..77c7e30 100644
--- a/libass/ass_render.c
+++ b/libass/ass_render.c
@@ -567,7 +567,7 @@ static void blend_vector_clip(ASS_Renderer *render_priv,
memcpy(nbuffer, abuffer, ((ah - 1) * as) + aw);
render_priv->engine->sub_bitmaps(nbuffer + atop * as + aleft, as,
bbuffer + btop * bs + bleft, bs,
- h, w);
+ w, h);
} else {
// Regular clip
if (ax + aw < bx || ay + ah < by || ax > bx + bw ||
@@ -2319,7 +2319,7 @@ static void render_and_combine_glyphs(ASS_Renderer *render_priv,
unsigned char *buf = dst->buffer + y * dst->stride + x;
render_priv->engine->add_bitmaps(buf, dst->stride,
src->buffer, src->stride,
- src->h, src->w);
+ src->w, src->h);
}
}
}
@@ -2353,7 +2353,7 @@ static void render_and_combine_glyphs(ASS_Renderer *render_priv,
unsigned char *buf = dst->buffer + y * dst->stride + x;
render_priv->engine->add_bitmaps(buf, dst->stride,
src->buffer, src->stride,
- src->h, src->w);
+ src->w, src->h);
}
}
}
diff --git a/libass/x86/blend_bitmaps.asm b/libass/x86/blend_bitmaps.asm
index 3a9b2dd..118feea 100644
--- a/libass/x86/blend_bitmaps.asm
+++ b/libass/x86/blend_bitmaps.asm
@@ -29,16 +29,16 @@ SECTION .text
;------------------------------------------------------------------------------
; void add_bitmaps( uint8_t *dst, intptr_t dst_stride,
; uint8_t *src, intptr_t src_stride,
-; intptr_t height, intptr_t width );
+; intptr_t width, intptr_t height );
;------------------------------------------------------------------------------
INIT_XMM
cglobal add_bitmaps_x86, 6,7
.skip_prologue:
- imul r4, r3
- add r4, r2
- PUSH r4
- mov r4, r3
+ imul r5, r3
+ add r5, r2
+ PUSH r5
+ mov r5, r3
.height_loop:
xor r6, r6 ; x offset
.stride_loop:
@@ -49,10 +49,10 @@ cglobal add_bitmaps_x86, 6,7
.continue:
mov byte [r0 + r6], r3b
inc r6
- cmp r6, r5
+ cmp r6, r4
jl .stride_loop ; still in scan line
add r0, r1
- add r2, r4
+ add r2, r5
cmp r2, [rsp]
jl .height_loop
ADD rsp, gprsize
@@ -61,7 +61,7 @@ cglobal add_bitmaps_x86, 6,7
%macro ADD_BITMAPS 0
cglobal add_bitmaps, 6,7
.skip_prologue:
- cmp r5, mmsize
+ cmp r4, mmsize
%if mmsize == 16
jl add_bitmaps_x86.skip_prologue
%else
@@ -70,20 +70,20 @@ cglobal add_bitmaps_x86, 6,7
%if mmsize == 32
vzeroupper
%endif
- imul r4, r3
- add r4, r2 ; last address
+ imul r5, r3
+ add r5, r2 ; last address
.height_loop:
xor r6, r6 ; x offset
.stride_loop:
- movu m0, [r0 + r6]
- paddusb m0, [r2 + r6]
- movu [r0 + r6], m0
+ movu m1, [r0 + r6]
+ paddusb m1, [r2 + r6]
+ movu [r0 + r6], m1
add r6, mmsize
- cmp r6, r5
+ cmp r6, r4
jl .stride_loop ; still in scan line
add r0, r1
add r2, r3
- cmp r2, r4
+ cmp r2, r5
jl .height_loop
RET
%endmacro
@@ -96,16 +96,16 @@ ADD_BITMAPS
;------------------------------------------------------------------------------
; void sub_bitmaps( uint8_t *dst, intptr_t dst_stride,
; uint8_t *src, intptr_t src_stride,
-; intptr_t height, intptr_t width );
+; intptr_t width, intptr_t height );
;------------------------------------------------------------------------------
INIT_XMM
cglobal sub_bitmaps_x86, 6,10
.skip_prologue:
- imul r4, r3
- add r4, r2 ; last address
- PUSH r4
- mov r4, r3
+ imul r5, r3
+ add r5, r2 ; last address
+ PUSH r5
+ mov r5, r3
.height_loop:
xor r6, r6 ; x offset
.stride_loop:
@@ -116,10 +116,10 @@ cglobal sub_bitmaps_x86, 6,10
.continue:
mov byte [r0 + r6], r3b
inc r6
- cmp r6, r5
+ cmp r6, r4
jl .stride_loop ; still in scan line
add r0, r1
- add r2, r4
+ add r2, r5
cmp r2, [rsp]
jl .height_loop
ADD rsp, gprsize
@@ -130,7 +130,7 @@ cglobal sub_bitmaps_x86, 6,10
%macro SUB_BITMAPS 0
cglobal sub_bitmaps, 6,10
.skip_prologue:
- cmp r5, mmsize
+ cmp r4, mmsize
%if mmsize == 16
jl sub_bitmaps_x86.skip_prologue
%else
@@ -139,23 +139,23 @@ cglobal sub_bitmaps_x86, 6,10
%if mmsize == 32
vzeroupper
%endif
- imul r4, r3
- add r4, r2 ; last address
- mov r7, r5
+ imul r5, r3
+ add r5, r2 ; last address
+ mov r7, r4
and r7, -mmsize ; &= (16);
xor r9, r9
.height_loop:
xor r6, r6 ; x offset
.stride_loop:
- movu m0, [r0 + r6]
- movu m1, [r2 + r6]
- psubusb m0, m1
- movu [r0 + r6], m0
+ movu m1, [r0 + r6]
+ movu m2, [r2 + r6]
+ psubusb m1, m2
+ movu [r0 + r6], m1
add r6, mmsize
cmp r6, r7
jl .stride_loop ; still in scan line
.stride_loop2
- cmp r6, r5
+ cmp r6, r4
jge .finish
movzx r8, byte [r0 + r6]
sub r8b, byte [r2 + r6]
@@ -166,7 +166,7 @@ cglobal sub_bitmaps_x86, 6,10
.finish
add r0, r1
add r2, r3
- cmp r2, r4
+ cmp r2, r5
jl .height_loop
RET
%endmacro
@@ -221,15 +221,15 @@ cglobal mul_bitmaps, 8,12
.height_loop:
xor r8, r8 ; x offset
.stride_loop:
- movq xmm0, [r2 + r8]
- movq xmm1, [r4 + r8]
- punpcklbw xmm0, xmm2
+ movq xmm1, [r2 + r8]
+ movq xmm2, [r4 + r8]
punpcklbw xmm1, xmm2
- pmullw xmm0, xmm1
- paddw xmm0, xmm3
- psrlw xmm0, 0x08
- packuswb xmm0, xmm0
- movq [r0 + r8], xmm0
+ punpcklbw xmm2, xmm2
+ pmullw xmm1, xmm2
+ paddw xmm1, xmm3
+ psrlw xmm1, 0x08
+ packuswb xmm1, xmm1
+ movq [r0 + r8], xmm1
add r8, 8
cmp r8, r9
jl .stride_loop ; still in scan line
@@ -268,18 +268,18 @@ cglobal mul_bitmaps, 8,12
.height_loop:
xor r8, r8 ; x offset
.stride_loop:
- vmovdqu xmm0, [r2 + r8]
- vpermq ymm0, ymm0, 0x10
- vmovdqu xmm1, [r4 + r8]
+ vmovdqu xmm1, [r2 + r8]
vpermq ymm1, ymm1, 0x10
- vpunpcklbw ymm0, ymm0, ymm2
+ vmovdqu xmm2, [r4 + r8]
+ vpermq ymm2, ymm2, 0x10
vpunpcklbw ymm1, ymm1, ymm2
- vpmullw ymm0, ymm0, ymm1
- vpaddw ymm0, ymm0, ymm3
- vpsrlw ymm0, ymm0, 0x08
- vextracti128 xmm4, ymm0, 0x1
- vpackuswb ymm0, ymm0, ymm4
- vmovdqa [r0 + r8], xmm0
+ vpunpcklbw ymm2, ymm2, ymm2
+ vpmullw ymm1, ymm1, ymm2
+ vpaddw ymm1, ymm1, ymm3
+ vpsrlw ymm1, ymm1, 0x08
+ vextracti128 xmm4, ymm1, 0x1
+ vpackuswb ymm1, ymm1, ymm4
+ vmovdqa [r0 + r8], xmm1
add r8, 16
cmp r8, r9
jl .stride_loop ; still in scan line