diff options
author | Dr.Smile <vabnick@gmail.com> | 2021-03-15 09:50:46 +0300 |
---|---|---|
committer | Dr.Smile <vabnick@gmail.com> | 2021-04-21 21:46:09 +0300 |
commit | 045e17d5dc468218e4c9e8a1eb862d7b132b080a (patch) | |
tree | 54e9438690a1bd7877350008f616b8744915aafa | |
parent | d20d4ee08fec4db7ca771ec8d0d7ee190bbbf1a3 (diff) | |
download | libass-045e17d5dc468218e4c9e8a1eb862d7b132b080a.tar.bz2 libass-045e17d5dc468218e4c9e8a1eb862d7b132b080a.tar.xz |
rasterizer: make C and assembly functions bitwise identical
Fixes https://github.com/libass/libass/issues/475
-rw-r--r-- | libass/ass_rasterizer_c.c | 10 | ||||
-rw-r--r-- | libass/x86/rasterizer.asm | 17 |
2 files changed, 15 insertions, 12 deletions
diff --git a/libass/ass_rasterizer_c.c b/libass/ass_rasterizer_c.c index 7e33231..6c81695 100644 --- a/libass/ass_rasterizer_c.c +++ b/libass/ass_rasterizer_c.c @@ -86,14 +86,15 @@ void ass_fill_halfplane_tile16_c(uint8_t *buf, ptrdiff_t stride, va2[x] = aa * x + delta; } - static const int16_t full = (1 << 10) - 1; + static const int16_t full = 1 << 10; for (int y = 0; y < 16; y++) { for (int x = 0; x < 16; x++) { int16_t c1 = cc - va1[x]; int16_t c2 = cc - va2[x]; c1 = FFMINMAX(c1, 0, full); c2 = FFMINMAX(c2, 0, full); - buf[x] = (c1 + c2) >> 3; + int16_t res = (c1 + c2) >> 3; + buf[x] = FFMIN(res, 255); } buf += stride; cc -= bb; @@ -118,14 +119,15 @@ void ass_fill_halfplane_tile32_c(uint8_t *buf, ptrdiff_t stride, va2[x] = aa * x + delta; } - static const int16_t full = (1 << 9) - 1; + static const int16_t full = 1 << 9; for (int y = 0; y < 32; y++) { for (int x = 0; x < 32; x++) { int16_t c1 = cc - va1[x]; int16_t c2 = cc - va2[x]; c1 = FFMINMAX(c1, 0, full); c2 = FFMINMAX(c2, 0, full); - buf[x] = (c1 + c2) >> 2; + int16_t res = (c1 + c2) >> 2; + buf[x] = FFMIN(res, 255); } buf += stride; cc -= bb; diff --git a/libass/x86/rasterizer.asm b/libass/x86/rasterizer.asm index ed4c8df..2655f12 100644 --- a/libass/x86/rasterizer.asm +++ b/libass/x86/rasterizer.asm @@ -403,17 +403,18 @@ endstruc imul %7d, %4d ; sum * b sar %7d, 7 ; avg * b - add %7d, %9d ; avg * b + dc - add %9d, %9d ; 2 * dc + sub %7d, %9d ; avg * b - dc + lea %9d, [%7d + 2 * %9d] ; avg * b + dc imul %7d, %8d - sar %7d, 16 - sub %7d, %6d ; -offs1 - BCASTW %10, %7d imul %9d, %8d - sar %9d, 16 ; offs2 - offs1 - BCASTW %11, %9d + sar %7d, 16 + sar %9d, 16 + sub %7d, %9d ; offs1 - offs2 + sub %9d, %6d ; -offs1 add %6d, %6d + BCASTW %11, %7d + BCASTW %10, %9d BCASTW %12, %6d %assign %%i 0 @@ -428,7 +429,7 @@ endstruc pmulhw m%13, mm_c, m%14 %endif psubw m%13, m%10 ; c1 - paddw m%14, m%13, m%11 ; c2 + psubw m%14, m%13, m%11 ; c2 pmaxsw m%13, mm_zero pmaxsw m%14, mm_zero pminsw m%13, m%12 |