diff options
author | Oleg Oshmyan <chortos@inbox.lv> | 2023-01-21 18:57:40 +0200 |
---|---|---|
committer | Oleg Oshmyan <chortos@inbox.lv> | 2023-02-09 16:17:43 +0200 |
commit | 5ef685789cce1d48223339709c5e9b80292724d6 (patch) | |
tree | 2ebf7d902cccbcc7a6c10a4c9737bd79e66c7b6e | |
parent | 2d263786fade9581fda4e03d5c8f19a870fd89fb (diff) | |
download | libass-5ef685789cce1d48223339709c5e9b80292724d6.tar.bz2 libass-5ef685789cce1d48223339709c5e9b80292724d6.tar.xz |
Bidi: take into account all kinds of bidi paragraph separators
Bidi is done one "paragraph" at a time, delimited by code points
that have Bidi_Class=Paragraph_Separator in UCD DerivedBidiClass.txt:
https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBidiClass.txt
Each bidi "paragraph" has its own (single) base direction, and all
explicit directional embeddings, overrides and isolates are always
terminated at the end of the paragraph.
FriBidi's text analysis function, fribidi_get_par_embedding_levels...,
expects a single paragraph as input. In practice, this means it stops
processing explicit overrides etc. when it encounters the nearest
paragraph separator.
We gave FriBidi lines of text delimited only by U+000A, so it could span
multiple bidi paragraphs and produce unexpected output. The fix is to
detect all paragraph separators recognized by FriBidi and split paragraphs
on them.
We only actually break lines on U+000A (and softly on U+0020) in
VSFilter-compatible mode or following the Unicode line breaking algorithm
with ASS_FEATURE_WRAP_UNICODE, but even the latter does not, in fact,
break lines on all of the code points that the bidirectional algorithm
recognizes as "paragraph separators" (U+001C..1E: for line breaking, they
are treated as combining marks and effectively merged into the preceding
character; note that the line breaking algorithm also has its own,
incompatible, definition of a "paragraph"). So it is possible that a bidi
"paragraph" starts and ends in the middle of a line, and our reordering
loop must stop at *both* line breaks and bidi-paragraph breaks for the
output to make any amount of sense.
The observable symptoms fixed by this patch are:
* When any of U+000D, U+001C..1E, U+0085, U+2029 occur in the text,
explicit directional overrides etc. work improperly from that point
until the next \N. (It seems they act as strong characters themselves,
breaking up the surrounding run if appropriate, but they do not
establish any embedding for the text contained within.)
* With ASS_FEATURE_WHOLE_TEXT (including Encoding -1), base direction is
not resolved separately for text that follows those same code points.
This is especially visible with ASS_FEATURE_WRAP_UNICODE, which breaks
lines on U+000D, U+0085 and U+2029.
I have verified that the new rendering with the ASS_Features disabled
matches VSFilter (GDI/Uniscribe), other than the glyphs that each special
character itself is displayed at, which is unrelated to bidi.
(For what it is worth, we currently display tofu boxes for all of these
special characters, whereas in VSFilter U+000B..C appear as dingbats,
the non-ASCII code points seem to intermittently be invisible or display
tofu, and I have not tested U+000D.)
-rw-r--r-- | libass/ass_shaper.c | 37 |
1 files changed, 27 insertions, 10 deletions
diff --git a/libass/ass_shaper.c b/libass/ass_shaper.c index 1ce374c..85dbd4a 100644 --- a/libass/ass_shaper.c +++ b/libass/ass_shaper.c @@ -97,10 +97,10 @@ void ass_shaper_info(ASS_Library *lib) } /** - * \brief grow arrays, if needed + * \brief grow per-codepoint arrays, if needed * \param new_size requested size */ -static bool check_allocations(ASS_Shaper *shaper, size_t new_size, size_t n_pars) +static bool check_codepoint_allocations(ASS_Shaper *shaper, size_t new_size) { if (new_size > shaper->n_codepoints) { if (!ASS_REALLOC_ARRAY(shaper->ctypes, new_size) || @@ -112,6 +112,15 @@ static bool check_allocations(ASS_Shaper *shaper, size_t new_size, size_t n_pars return false; shaper->n_codepoints = new_size; } + return true; +} + +/** + * \brief grow per-bidi-paragraph arrays, if needed + * \param n_pars requested size + */ +static bool check_par_allocations(ASS_Shaper *shaper, size_t n_pars) +{ if (shaper->whole_text_layout && n_pars > shaper->n_pars) { if (!ASS_REALLOC_ARRAY(shaper->pbase_dir, n_pars)) return false; @@ -968,12 +977,7 @@ bool ass_shaper_shape(ASS_Shaper *shaper, TextInfo *text_info) GlyphInfo *glyphs = text_info->glyphs; shaper->event_text = text_info->event_text; - int n_pars = 1; - for (i = 0; i < text_info->length - 1; i++) - if (glyphs[i].symbol == '\n') - n_pars++; - - if (!check_allocations(shaper, text_info->length, n_pars)) + if (!check_codepoint_allocations(shaper, text_info->length)) return false; for (i = 0; i < text_info->length; i++) @@ -982,6 +986,14 @@ bool ass_shaper_shape(ASS_Shaper *shaper, TextInfo *text_info) fribidi_get_bidi_types(shaper->event_text, text_info->length, shaper->ctypes); + int n_pars = 1; + for (i = 0; i < text_info->length - 1; i++) + if (shaper->ctypes[i] == FRIBIDI_TYPE_BS) + n_pars++; + + if (!check_par_allocations(shaper, n_pars)) + return false; + #ifdef USE_FRIBIDI_EX_API if (shaper->bidi_brackets) { fribidi_get_bracket_types(shaper->event_text, @@ -994,7 +1006,8 @@ bool ass_shaper_shape(ASS_Shaper *shaper, TextInfo *text_info) pdir = shaper->pbase_dir; for (i = 0; i < text_info->length; i++) { // embedding levels should be calculated paragraph by paragraph - if (glyphs[i].symbol == '\n' || i == text_info->length - 1 || + if (i == text_info->length - 1 || + shaper->ctypes[i] == FRIBIDI_TYPE_BS || (!shaper->whole_text_layout && (glyphs[i + 1].starts_new_run || glyphs[i].hspacing))) { dir = shaper->base_direction; @@ -1091,7 +1104,11 @@ FriBidiStrIndex *ass_shaper_reorder(ASS_Shaper *shaper, TextInfo *text_info) shaper->pbase_dir : &shaper->base_direction; GlyphInfo *glyphs = text_info->glyphs; for (i = 0; i < text_info->length; i++) { + // Bidi "paragraph separators" may occur between line breaks: + // U+001C..1E even with ASS_FEATURE_WRAP_UNICODE, + // or U+000D, U+0085, U+2029 only without it if (i == text_info->length - 1 || glyphs[i + 1].linebreak || + shaper->ctypes[i] == FRIBIDI_TYPE_BS || (!shaper->whole_text_layout && (glyphs[i + 1].starts_new_run || glyphs[i].hspacing))) { ret = fribidi_reorder_line(0, @@ -1102,7 +1119,7 @@ FriBidiStrIndex *ass_shaper_reorder(ASS_Shaper *shaper, TextInfo *text_info) return NULL; last_break = i + 1; - if (shaper->whole_text_layout && glyphs[i].symbol == '\n') + if (shaper->whole_text_layout && shaper->ctypes[i] == FRIBIDI_TYPE_BS) pdir++; } } |