summaryrefslogtreecommitdiffstats
path: root/libass/ass_shaper.c
diff options
context:
space:
mode:
Diffstat (limited to 'libass/ass_shaper.c')
-rw-r--r--libass/ass_shaper.c174
1 files changed, 170 insertions, 4 deletions
diff --git a/libass/ass_shaper.c b/libass/ass_shaper.c
index 6ffc083..87b6c6d 100644
--- a/libass/ass_shaper.c
+++ b/libass/ass_shaper.c
@@ -412,6 +412,107 @@ static hb_font_t *get_hb_font(ASS_Shaper *shaper, GlyphInfo *info)
}
/**
+ * \brief Map script to default language.
+ *
+ * This maps a script to a language, if a script has a representative
+ * language it is typically used with. Otherwise, the invalid language
+ * is returned.
+ *
+ * The mapping is similar to Pango's pango-language.c.
+ *
+ * \param script script tag
+ * \return language tag
+ */
+static hb_language_t script_to_language(hb_script_t script)
+{
+ switch (script) {
+ // Unicode 1.1
+ case HB_SCRIPT_ARABIC: return hb_language_from_string("ar", -1); break;
+ case HB_SCRIPT_ARMENIAN: return hb_language_from_string("hy", -1); break;
+ case HB_SCRIPT_BENGALI: return hb_language_from_string("bn", -1); break;
+ case HB_SCRIPT_CANADIAN_ABORIGINAL: return hb_language_from_string("iu", -1); break;
+ case HB_SCRIPT_CHEROKEE: return hb_language_from_string("chr", -1); break;
+ case HB_SCRIPT_COPTIC: return hb_language_from_string("cop", -1); break;
+ case HB_SCRIPT_CYRILLIC: return hb_language_from_string("ru", -1); break;
+ case HB_SCRIPT_DEVANAGARI: return hb_language_from_string("hi", -1); break;
+ case HB_SCRIPT_GEORGIAN: return hb_language_from_string("ka", -1); break;
+ case HB_SCRIPT_GREEK: return hb_language_from_string("el", -1); break;
+ case HB_SCRIPT_GUJARATI: return hb_language_from_string("gu", -1); break;
+ case HB_SCRIPT_GURMUKHI: return hb_language_from_string("pa", -1); break;
+ case HB_SCRIPT_HANGUL: return hb_language_from_string("ko", -1); break;
+ case HB_SCRIPT_HEBREW: return hb_language_from_string("he", -1); break;
+ case HB_SCRIPT_HIRAGANA: return hb_language_from_string("ja", -1); break;
+ case HB_SCRIPT_KANNADA: return hb_language_from_string("kn", -1); break;
+ case HB_SCRIPT_KATAKANA: return hb_language_from_string("ja", -1); break;
+ case HB_SCRIPT_LAO: return hb_language_from_string("lo", -1); break;
+ case HB_SCRIPT_LATIN: return hb_language_from_string("en", -1); break;
+ case HB_SCRIPT_MALAYALAM: return hb_language_from_string("ml", -1); break;
+ case HB_SCRIPT_MONGOLIAN: return hb_language_from_string("mn", -1); break;
+ case HB_SCRIPT_ORIYA: return hb_language_from_string("or", -1); break;
+ case HB_SCRIPT_SYRIAC: return hb_language_from_string("syr", -1); break;
+ case HB_SCRIPT_TAMIL: return hb_language_from_string("ta", -1); break;
+ case HB_SCRIPT_TELUGU: return hb_language_from_string("te", -1); break;
+ case HB_SCRIPT_THAI: return hb_language_from_string("th", -1); break;
+
+ // Unicode 2.0
+ case HB_SCRIPT_TIBETAN: return hb_language_from_string("bo", -1); break;
+
+ // Unicode 3.0
+ case HB_SCRIPT_ETHIOPIC: return hb_language_from_string("am", -1); break;
+ case HB_SCRIPT_KHMER: return hb_language_from_string("km", -1); break;
+ case HB_SCRIPT_MYANMAR: return hb_language_from_string("my", -1); break;
+ case HB_SCRIPT_SINHALA: return hb_language_from_string("si", -1); break;
+ case HB_SCRIPT_THAANA: return hb_language_from_string("dv", -1); break;
+
+ // Unicode 3.2
+ case HB_SCRIPT_BUHID: return hb_language_from_string("bku", -1); break;
+ case HB_SCRIPT_HANUNOO: return hb_language_from_string("hnn", -1); break;
+ case HB_SCRIPT_TAGALOG: return hb_language_from_string("tl", -1); break;
+ case HB_SCRIPT_TAGBANWA: return hb_language_from_string("tbw", -1); break;
+
+ // Unicode 4.0
+ case HB_SCRIPT_UGARITIC: return hb_language_from_string("uga", -1); break;
+
+ // Unicode 4.1
+ case HB_SCRIPT_BUGINESE: return hb_language_from_string("bug", -1); break;
+ case HB_SCRIPT_OLD_PERSIAN: return hb_language_from_string("peo", -1); break;
+ case HB_SCRIPT_SYLOTI_NAGRI: return hb_language_from_string("syl", -1); break;
+
+ // Unicode 5.0
+ case HB_SCRIPT_NKO: return hb_language_from_string("nko", -1); break;
+
+ // no representative language exists
+ default: return HB_LANGUAGE_INVALID; break;
+ }
+}
+
+/**
+ * \brief Determine language to be used for shaping a run.
+ *
+ * \param shaper shaper instance
+ * \param script script tag associated with run
+ * \return language tag
+ */
+static hb_language_t
+hb_shaper_get_run_language(ASS_Shaper *shaper, hb_script_t script)
+{
+ hb_language_t lang;
+
+ // override set, use it
+ if (shaper->language != HB_LANGUAGE_INVALID)
+ return shaper->language;
+
+ // get default language for given script
+ lang = script_to_language(script);
+
+ // no dice, use system default
+ if (lang == HB_LANGUAGE_INVALID)
+ lang = hb_language_get_default();
+
+ return lang;
+}
+
+/**
* \brief Shape event text with HarfBuzz. Full OpenType shaping.
* \param glyphs glyph clusters
* \param len number of clusters
@@ -433,6 +534,7 @@ static void shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
int k = i;
int level = glyphs[i].shape_run_id;
int direction = shaper->emblevels[k] % 2;
+ hb_script_t script = glyphs[i].script;
while (i < (len - 1) && level == glyphs[i+1].shape_run_id)
i++;
runs[run].offset = k;
@@ -443,7 +545,9 @@ static void shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
hb_buffer_pre_allocate(runs[run].buf, i - k + 1);
hb_buffer_set_direction(runs[run].buf, direction ? HB_DIRECTION_RTL :
HB_DIRECTION_LTR);
- hb_buffer_set_language(runs[run].buf, shaper->language);
+ hb_buffer_set_language(runs[run].buf,
+ hb_shaper_get_run_language(shaper, script));
+ hb_buffer_set_script(runs[run].buf, script);
hb_buffer_add_utf32(runs[run].buf, shaper->event_text + k, i - k + 1,
0, i - k + 1);
hb_shape(runs[run].font, runs[run].buf, shaper->features,
@@ -496,6 +600,56 @@ static void shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
}
}
+
+/**
+ * \brief Determine script property of all characters. Characters of script
+ * common and inherited get their script from their context.
+ *
+ */
+void ass_shaper_determine_script(ASS_Shaper *shaper, GlyphInfo *glyphs,
+ size_t len)
+{
+ int i;
+ int backwards_scan = 0;
+ hb_unicode_funcs_t *ufuncs = hb_unicode_funcs_get_default();
+ hb_script_t last_script = HB_SCRIPT_UNKNOWN;
+
+ // determine script (forward scan)
+ for (i = 0; i < len; i++) {
+ GlyphInfo *info = glyphs + i;
+ info->script = hb_unicode_script(ufuncs, info->symbol);
+
+ // common/inherit codepoints inherit script from context
+ if (info->script == HB_SCRIPT_COMMON ||
+ info->script == HB_SCRIPT_INHERITED) {
+ // unknown is not a valid context
+ if (last_script != HB_SCRIPT_UNKNOWN)
+ info->script = last_script;
+ else
+ // do a backwards scan to check if next codepoint
+ // contains a valid script for context
+ backwards_scan = 1;
+ } else {
+ last_script = info->script;
+ }
+ }
+
+ // determine script (backwards scan, if needed)
+ last_script = HB_SCRIPT_UNKNOWN;
+ for (i = len - 1; i >= 0 && backwards_scan; i--) {
+ GlyphInfo *info = glyphs + i;
+
+ // common/inherit codepoints inherit script from context
+ if (info->script == HB_SCRIPT_COMMON ||
+ info->script == HB_SCRIPT_INHERITED) {
+ // unknown script is not a valid context
+ if (last_script != HB_SCRIPT_UNKNOWN)
+ info->script = last_script;
+ } else {
+ last_script = info->script;
+ }
+ }
+}
#endif
/**
@@ -546,6 +700,11 @@ void ass_shaper_find_runs(ASS_Shaper *shaper, ASS_Renderer *render_priv,
int i;
int shape_run = 0;
+#ifdef CONFIG_HARFBUZZ
+ ass_shaper_determine_script(shaper, glyphs, len);
+#endif
+
+ // find appropriate fonts for the shape runs
for (i = 0; i < len; i++) {
GlyphInfo *last = glyphs + i - 1;
GlyphInfo *info = glyphs + i;
@@ -558,11 +717,11 @@ void ass_shaper_find_runs(ASS_Shaper *shaper, ASS_Renderer *render_priv,
// shape runs share the same font face and size
if (i > 0 && (last->font != info->font ||
last->font_size != info->font_size ||
- last->face_index != info->face_index))
+ last->face_index != info->face_index ||
+ last->script != info->script))
shape_run++;
info->shape_run_id = shape_run;
}
-
}
/**
@@ -582,7 +741,14 @@ void ass_shaper_set_base_direction(ASS_Shaper *shaper, FriBidiParType dir)
void ass_shaper_set_language(ASS_Shaper *shaper, const char *code)
{
#ifdef CONFIG_HARFBUZZ
- shaper->language = hb_language_from_string(code, -1);
+ hb_language_t lang;
+
+ if (code)
+ lang = hb_language_from_string(code, -1);
+ else
+ lang = HB_LANGUAGE_INVALID;
+
+ shaper->language = lang;
#endif
}