summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Oshmyan <chortos@inbox.lv>2023-01-21 18:57:40 +0200
committerOleg Oshmyan <chortos@inbox.lv>2023-02-09 16:17:43 +0200
commit5ef685789cce1d48223339709c5e9b80292724d6 (patch)
tree2ebf7d902cccbcc7a6c10a4c9737bd79e66c7b6e
parent2d263786fade9581fda4e03d5c8f19a870fd89fb (diff)
downloadlibass-5ef685789cce1d48223339709c5e9b80292724d6.tar.bz2
libass-5ef685789cce1d48223339709c5e9b80292724d6.tar.xz
Bidi: take into account all kinds of bidi paragraph separators
Bidi is done one "paragraph" at a time, delimited by code points that have Bidi_Class=Paragraph_Separator in UCD DerivedBidiClass.txt: https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBidiClass.txt Each bidi "paragraph" has its own (single) base direction, and all explicit directional embeddings, overrides and isolates are always terminated at the end of the paragraph. FriBidi's text analysis function, fribidi_get_par_embedding_levels..., expects a single paragraph as input. In practice, this means it stops processing explicit overrides etc. when it encounters the nearest paragraph separator. We gave FriBidi lines of text delimited only by U+000A, so it could span multiple bidi paragraphs and produce unexpected output. The fix is to detect all paragraph separators recognized by FriBidi and split paragraphs on them. We only actually break lines on U+000A (and softly on U+0020) in VSFilter-compatible mode or following the Unicode line breaking algorithm with ASS_FEATURE_WRAP_UNICODE, but even the latter does not, in fact, break lines on all of the code points that the bidirectional algorithm recognizes as "paragraph separators" (U+001C..1E: for line breaking, they are treated as combining marks and effectively merged into the preceding character; note that the line breaking algorithm also has its own, incompatible, definition of a "paragraph"). So it is possible that a bidi "paragraph" starts and ends in the middle of a line, and our reordering loop must stop at *both* line breaks and bidi-paragraph breaks for the output to make any amount of sense. The observable symptoms fixed by this patch are: * When any of U+000D, U+001C..1E, U+0085, U+2029 occur in the text, explicit directional overrides etc. work improperly from that point until the next \N. (It seems they act as strong characters themselves, breaking up the surrounding run if appropriate, but they do not establish any embedding for the text contained within.) * With ASS_FEATURE_WHOLE_TEXT (including Encoding -1), base direction is not resolved separately for text that follows those same code points. This is especially visible with ASS_FEATURE_WRAP_UNICODE, which breaks lines on U+000D, U+0085 and U+2029. I have verified that the new rendering with the ASS_Features disabled matches VSFilter (GDI/Uniscribe), other than the glyphs that each special character itself is displayed at, which is unrelated to bidi. (For what it is worth, we currently display tofu boxes for all of these special characters, whereas in VSFilter U+000B..C appear as dingbats, the non-ASCII code points seem to intermittently be invisible or display tofu, and I have not tested U+000D.)
-rw-r--r--libass/ass_shaper.c37
1 files changed, 27 insertions, 10 deletions
diff --git a/libass/ass_shaper.c b/libass/ass_shaper.c
index 1ce374c..85dbd4a 100644
--- a/libass/ass_shaper.c
+++ b/libass/ass_shaper.c
@@ -97,10 +97,10 @@ void ass_shaper_info(ASS_Library *lib)
}
/**
- * \brief grow arrays, if needed
+ * \brief grow per-codepoint arrays, if needed
* \param new_size requested size
*/
-static bool check_allocations(ASS_Shaper *shaper, size_t new_size, size_t n_pars)
+static bool check_codepoint_allocations(ASS_Shaper *shaper, size_t new_size)
{
if (new_size > shaper->n_codepoints) {
if (!ASS_REALLOC_ARRAY(shaper->ctypes, new_size) ||
@@ -112,6 +112,15 @@ static bool check_allocations(ASS_Shaper *shaper, size_t new_size, size_t n_pars
return false;
shaper->n_codepoints = new_size;
}
+ return true;
+}
+
+/**
+ * \brief grow per-bidi-paragraph arrays, if needed
+ * \param n_pars requested size
+ */
+static bool check_par_allocations(ASS_Shaper *shaper, size_t n_pars)
+{
if (shaper->whole_text_layout && n_pars > shaper->n_pars) {
if (!ASS_REALLOC_ARRAY(shaper->pbase_dir, n_pars))
return false;
@@ -968,12 +977,7 @@ bool ass_shaper_shape(ASS_Shaper *shaper, TextInfo *text_info)
GlyphInfo *glyphs = text_info->glyphs;
shaper->event_text = text_info->event_text;
- int n_pars = 1;
- for (i = 0; i < text_info->length - 1; i++)
- if (glyphs[i].symbol == '\n')
- n_pars++;
-
- if (!check_allocations(shaper, text_info->length, n_pars))
+ if (!check_codepoint_allocations(shaper, text_info->length))
return false;
for (i = 0; i < text_info->length; i++)
@@ -982,6 +986,14 @@ bool ass_shaper_shape(ASS_Shaper *shaper, TextInfo *text_info)
fribidi_get_bidi_types(shaper->event_text,
text_info->length, shaper->ctypes);
+ int n_pars = 1;
+ for (i = 0; i < text_info->length - 1; i++)
+ if (shaper->ctypes[i] == FRIBIDI_TYPE_BS)
+ n_pars++;
+
+ if (!check_par_allocations(shaper, n_pars))
+ return false;
+
#ifdef USE_FRIBIDI_EX_API
if (shaper->bidi_brackets) {
fribidi_get_bracket_types(shaper->event_text,
@@ -994,7 +1006,8 @@ bool ass_shaper_shape(ASS_Shaper *shaper, TextInfo *text_info)
pdir = shaper->pbase_dir;
for (i = 0; i < text_info->length; i++) {
// embedding levels should be calculated paragraph by paragraph
- if (glyphs[i].symbol == '\n' || i == text_info->length - 1 ||
+ if (i == text_info->length - 1 ||
+ shaper->ctypes[i] == FRIBIDI_TYPE_BS ||
(!shaper->whole_text_layout &&
(glyphs[i + 1].starts_new_run || glyphs[i].hspacing))) {
dir = shaper->base_direction;
@@ -1091,7 +1104,11 @@ FriBidiStrIndex *ass_shaper_reorder(ASS_Shaper *shaper, TextInfo *text_info)
shaper->pbase_dir : &shaper->base_direction;
GlyphInfo *glyphs = text_info->glyphs;
for (i = 0; i < text_info->length; i++) {
+ // Bidi "paragraph separators" may occur between line breaks:
+ // U+001C..1E even with ASS_FEATURE_WRAP_UNICODE,
+ // or U+000D, U+0085, U+2029 only without it
if (i == text_info->length - 1 || glyphs[i + 1].linebreak ||
+ shaper->ctypes[i] == FRIBIDI_TYPE_BS ||
(!shaper->whole_text_layout &&
(glyphs[i + 1].starts_new_run || glyphs[i].hspacing))) {
ret = fribidi_reorder_line(0,
@@ -1102,7 +1119,7 @@ FriBidiStrIndex *ass_shaper_reorder(ASS_Shaper *shaper, TextInfo *text_info)
return NULL;
last_break = i + 1;
- if (shaper->whole_text_layout && glyphs[i].symbol == '\n')
+ if (shaper->whole_text_layout && shaper->ctypes[i] == FRIBIDI_TYPE_BS)
pdir++;
}
}