summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDr.Smile <vabnick@gmail.com>2021-03-15 09:50:46 +0300
committerDr.Smile <vabnick@gmail.com>2021-04-21 21:46:09 +0300
commit045e17d5dc468218e4c9e8a1eb862d7b132b080a (patch)
tree54e9438690a1bd7877350008f616b8744915aafa
parentd20d4ee08fec4db7ca771ec8d0d7ee190bbbf1a3 (diff)
downloadlibass-045e17d5dc468218e4c9e8a1eb862d7b132b080a.tar.bz2
libass-045e17d5dc468218e4c9e8a1eb862d7b132b080a.tar.xz
rasterizer: make C and assembly functions bitwise identical
Fixes https://github.com/libass/libass/issues/475
-rw-r--r--libass/ass_rasterizer_c.c10
-rw-r--r--libass/x86/rasterizer.asm17
2 files changed, 15 insertions, 12 deletions
diff --git a/libass/ass_rasterizer_c.c b/libass/ass_rasterizer_c.c
index 7e33231..6c81695 100644
--- a/libass/ass_rasterizer_c.c
+++ b/libass/ass_rasterizer_c.c
@@ -86,14 +86,15 @@ void ass_fill_halfplane_tile16_c(uint8_t *buf, ptrdiff_t stride,
va2[x] = aa * x + delta;
}
- static const int16_t full = (1 << 10) - 1;
+ static const int16_t full = 1 << 10;
for (int y = 0; y < 16; y++) {
for (int x = 0; x < 16; x++) {
int16_t c1 = cc - va1[x];
int16_t c2 = cc - va2[x];
c1 = FFMINMAX(c1, 0, full);
c2 = FFMINMAX(c2, 0, full);
- buf[x] = (c1 + c2) >> 3;
+ int16_t res = (c1 + c2) >> 3;
+ buf[x] = FFMIN(res, 255);
}
buf += stride;
cc -= bb;
@@ -118,14 +119,15 @@ void ass_fill_halfplane_tile32_c(uint8_t *buf, ptrdiff_t stride,
va2[x] = aa * x + delta;
}
- static const int16_t full = (1 << 9) - 1;
+ static const int16_t full = 1 << 9;
for (int y = 0; y < 32; y++) {
for (int x = 0; x < 32; x++) {
int16_t c1 = cc - va1[x];
int16_t c2 = cc - va2[x];
c1 = FFMINMAX(c1, 0, full);
c2 = FFMINMAX(c2, 0, full);
- buf[x] = (c1 + c2) >> 2;
+ int16_t res = (c1 + c2) >> 2;
+ buf[x] = FFMIN(res, 255);
}
buf += stride;
cc -= bb;
diff --git a/libass/x86/rasterizer.asm b/libass/x86/rasterizer.asm
index ed4c8df..2655f12 100644
--- a/libass/x86/rasterizer.asm
+++ b/libass/x86/rasterizer.asm
@@ -403,17 +403,18 @@ endstruc
imul %7d, %4d ; sum * b
sar %7d, 7 ; avg * b
- add %7d, %9d ; avg * b + dc
- add %9d, %9d ; 2 * dc
+ sub %7d, %9d ; avg * b - dc
+ lea %9d, [%7d + 2 * %9d] ; avg * b + dc
imul %7d, %8d
- sar %7d, 16
- sub %7d, %6d ; -offs1
- BCASTW %10, %7d
imul %9d, %8d
- sar %9d, 16 ; offs2 - offs1
- BCASTW %11, %9d
+ sar %7d, 16
+ sar %9d, 16
+ sub %7d, %9d ; offs1 - offs2
+ sub %9d, %6d ; -offs1
add %6d, %6d
+ BCASTW %11, %7d
+ BCASTW %10, %9d
BCASTW %12, %6d
%assign %%i 0
@@ -428,7 +429,7 @@ endstruc
pmulhw m%13, mm_c, m%14
%endif
psubw m%13, m%10 ; c1
- paddw m%14, m%13, m%11 ; c2
+ psubw m%14, m%13, m%11 ; c2
pmaxsw m%13, mm_zero
pmaxsw m%14, mm_zero
pminsw m%13, m%12