summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Oshmyan <chortos@inbox.lv>2021-09-18 17:05:50 +0300
committerOleg Oshmyan <chortos@inbox.lv>2021-10-19 23:54:03 +0300
commit3f9b63469764a9211e20381d974f449ddc5dac8f (patch)
treeb68b61c2dd79a82a387c2a40371a13f4c39dc935
parent3b6e3d86b75b57b5242762379c0f2e919af0b025 (diff)
downloadlibass-3f9b63469764a9211e20381d974f449ddc5dac8f.tar.bz2
libass-3f9b63469764a9211e20381d974f449ddc5dac8f.tar.xz
Pass ZWJ/ZWNJ to HarfBuzz across shape run boundaries
ZWJ and the characters next to it may be separated into different shape runs by bidi, script or font-fallback splitting. Nevertheless, ZWJ must affect Arabic-script characters on both sides of itself. This is noted in the description of the Unicode bidirectional algorithm [TR9-Joiners] and matches the observed behavior of VSFilter (GDI/Uniscribe). Fixes https://github.com/libass/libass/issues/545. We could more easily pass the whole event text to HarfBuzz rather than single out ZWJ and ZWNJ, but that would produce results different from VSFilter's: it shapes each high-level run in isolation; and in the rare case that Arabic text is set in a font that is missing some glyphs, GDI/Uniscribe apply font fallback and choose isolated/initial/final letter forms for each section separately, without joining the letters from different fonts together (which HarfBuzz would do if we gave it the full surrounding context), unless an explicit ZWJ is placed in between. Unlike ZWJ, I have not been able to produce a test for ZWNJ that would confirm whether VSFilter does this for ZWNJ as well. It should not matter for Arabic as the boundaries are non-joining by default, but it might make a difference for Indic scripts or others. It seems to make sense, so in absence of evidence to the contrary, this commit applies the same treatment to ZWNJ as well. [TR9-Joiners]: https://unicode.org/reports/tr9/#Joiners
-rw-r--r--libass/ass_shaper.c25
1 files changed, 22 insertions, 3 deletions
diff --git a/libass/ass_shaper.c b/libass/ass_shaper.c
index c4a157f..e406fd1 100644
--- a/libass/ass_shaper.c
+++ b/libass/ass_shaper.c
@@ -501,6 +501,15 @@ static hb_font_t *get_hb_font(ASS_Shaper *shaper, GlyphInfo *info)
}
/**
+ * \brief Determine whether this Unicode codepoint affects shaping
+ * of neighbors even if they are in separate shape runs due to bidi,
+ * script or font splitting, using VSFilter as the reference.
+ */
+static inline bool is_shaping_control(unsigned symbol) {
+ return symbol == 0x200C /* ZWNJ */ || symbol == 0x200D /* ZWJ */;
+}
+
+/**
* \brief Map script to default language.
*
* This maps a script to a language, if a script has a representative
@@ -682,9 +691,19 @@ static bool shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
level == shaper->emblevels[i + 1])
i++;
+ int lead_context = 0, trail_context = 0;
+ if (offset > 0 && !glyphs[offset].starts_new_run &&
+ is_shaping_control(glyphs[offset - 1].symbol))
+ lead_context = 1;
+ if (i < (len - 1) && !glyphs[i + 1].starts_new_run &&
+ is_shaping_control(glyphs[i + 1].symbol))
+ trail_context = 1;
+
hb_buffer_pre_allocate(buf, i - offset + 1);
- hb_buffer_add_utf32(buf, shaper->event_text + offset, i - offset + 1,
- 0, i - offset + 1);
+ hb_buffer_add_utf32(buf,
+ shaper->event_text + offset - lead_context,
+ i - offset + 1 + lead_context + trail_context,
+ lead_context, i - offset + 1);
props.direction = FRIBIDI_LEVEL_IS_RTL(level) ?
HB_DIRECTION_RTL : HB_DIRECTION_LTR;
@@ -695,7 +714,7 @@ static bool shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
set_run_features(shaper, glyphs + offset);
hb_shape(font, buf, shaper->features, shaper->n_features);
- shape_harfbuzz_process_run(glyphs, buf, offset);
+ shape_harfbuzz_process_run(glyphs, buf, offset - lead_context);
hb_buffer_reset(buf);
}