summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGrigori Goronzy <greg@blackbox>2013-03-04 20:24:35 +0100
committerGrigori Goronzy <greg@blackbox>2013-03-04 20:52:21 +0100
commitfc3b05f3178a88e5af1e994d91e43fdb0fda1059 (patch)
treee7a8ac498f28d8645564a09d7c383e3c5d03b204
parent256df617e227af2ed7274f4281756ec38ad30a70 (diff)
downloadlibass-fc3b05f3178a88e5af1e994d91e43fdb0fda1059.tar.bz2
libass-fc3b05f3178a88e5af1e994d91e43fdb0fda1059.tar.xz
shaper: proper script/language handling
Determine script for each character and use this as an additional property for splitting up the text into runs. Characters of Common or Inherited script assume the script of the preceding character. If that is not possible (First character(s) in a run are Common/Inherited, for instance), a backwards scan is done so they can assume the script of the following character. Additionally, determine default language in case no override is set. This simply maps a language to a script, if a language exists that is mostly representative for a given script. Pango's mapping has been adapted. This helps with fonts that don't have OpenType features set up for default script/language pairs. It's also considered to be right approach by most people, and might help with correct OpenType rendering in some other cases. Fixes issue 85.
-rw-r--r--libass/ass_render.c3
-rw-r--r--libass/ass_render.h8
-rw-r--r--libass/ass_shaper.c174
3 files changed, 179 insertions, 6 deletions
diff --git a/libass/ass_render.c b/libass/ass_render.c
index 6681612..a580b29 100644
--- a/libass/ass_render.c
+++ b/libass/ass_render.c
@@ -2256,8 +2256,7 @@ ass_start_frame(ASS_Renderer *render_priv, ASS_Track *track,
render_priv->border_scale *= settings_priv->font_size_coeff;
ass_shaper_set_kerning(render_priv->shaper, track->Kerning);
- if (track->Language)
- ass_shaper_set_language(render_priv->shaper, track->Language);
+ ass_shaper_set_language(render_priv->shaper, track->Language);
ass_shaper_set_level(render_priv->shaper, render_priv->settings.shaper);
// PAR correction
diff --git a/libass/ass_render.h b/libass/ass_render.h
index 8feeadc..53343fd 100644
--- a/libass/ass_render.h
+++ b/libass/ass_render.h
@@ -26,6 +26,9 @@
#include FT_STROKER_H
#include FT_GLYPH_H
#include FT_SYNTHESIS_H
+#ifdef CONFIG_HARFBUZZ
+#include "hb.h"
+#endif
// XXX: fix the inclusion mess so we can avoid doing this here
typedef struct ass_shaper ASS_Shaper;
@@ -109,6 +112,11 @@ typedef struct glyph_info {
ASS_Font *font;
int face_index;
int glyph_index;
+#ifdef CONFIG_HARFBUZZ
+ hb_script_t script;
+#else
+ int script;
+#endif
double font_size;
ASS_Drawing *drawing;
FT_Outline *outline;
diff --git a/libass/ass_shaper.c b/libass/ass_shaper.c
index 6ffc083..87b6c6d 100644
--- a/libass/ass_shaper.c
+++ b/libass/ass_shaper.c
@@ -412,6 +412,107 @@ static hb_font_t *get_hb_font(ASS_Shaper *shaper, GlyphInfo *info)
}
/**
+ * \brief Map script to default language.
+ *
+ * This maps a script to a language, if a script has a representative
+ * language it is typically used with. Otherwise, the invalid language
+ * is returned.
+ *
+ * The mapping is similar to Pango's pango-language.c.
+ *
+ * \param script script tag
+ * \return language tag
+ */
+static hb_language_t script_to_language(hb_script_t script)
+{
+ switch (script) {
+ // Unicode 1.1
+ case HB_SCRIPT_ARABIC: return hb_language_from_string("ar", -1); break;
+ case HB_SCRIPT_ARMENIAN: return hb_language_from_string("hy", -1); break;
+ case HB_SCRIPT_BENGALI: return hb_language_from_string("bn", -1); break;
+ case HB_SCRIPT_CANADIAN_ABORIGINAL: return hb_language_from_string("iu", -1); break;
+ case HB_SCRIPT_CHEROKEE: return hb_language_from_string("chr", -1); break;
+ case HB_SCRIPT_COPTIC: return hb_language_from_string("cop", -1); break;
+ case HB_SCRIPT_CYRILLIC: return hb_language_from_string("ru", -1); break;
+ case HB_SCRIPT_DEVANAGARI: return hb_language_from_string("hi", -1); break;
+ case HB_SCRIPT_GEORGIAN: return hb_language_from_string("ka", -1); break;
+ case HB_SCRIPT_GREEK: return hb_language_from_string("el", -1); break;
+ case HB_SCRIPT_GUJARATI: return hb_language_from_string("gu", -1); break;
+ case HB_SCRIPT_GURMUKHI: return hb_language_from_string("pa", -1); break;
+ case HB_SCRIPT_HANGUL: return hb_language_from_string("ko", -1); break;
+ case HB_SCRIPT_HEBREW: return hb_language_from_string("he", -1); break;
+ case HB_SCRIPT_HIRAGANA: return hb_language_from_string("ja", -1); break;
+ case HB_SCRIPT_KANNADA: return hb_language_from_string("kn", -1); break;
+ case HB_SCRIPT_KATAKANA: return hb_language_from_string("ja", -1); break;
+ case HB_SCRIPT_LAO: return hb_language_from_string("lo", -1); break;
+ case HB_SCRIPT_LATIN: return hb_language_from_string("en", -1); break;
+ case HB_SCRIPT_MALAYALAM: return hb_language_from_string("ml", -1); break;
+ case HB_SCRIPT_MONGOLIAN: return hb_language_from_string("mn", -1); break;
+ case HB_SCRIPT_ORIYA: return hb_language_from_string("or", -1); break;
+ case HB_SCRIPT_SYRIAC: return hb_language_from_string("syr", -1); break;
+ case HB_SCRIPT_TAMIL: return hb_language_from_string("ta", -1); break;
+ case HB_SCRIPT_TELUGU: return hb_language_from_string("te", -1); break;
+ case HB_SCRIPT_THAI: return hb_language_from_string("th", -1); break;
+
+ // Unicode 2.0
+ case HB_SCRIPT_TIBETAN: return hb_language_from_string("bo", -1); break;
+
+ // Unicode 3.0
+ case HB_SCRIPT_ETHIOPIC: return hb_language_from_string("am", -1); break;
+ case HB_SCRIPT_KHMER: return hb_language_from_string("km", -1); break;
+ case HB_SCRIPT_MYANMAR: return hb_language_from_string("my", -1); break;
+ case HB_SCRIPT_SINHALA: return hb_language_from_string("si", -1); break;
+ case HB_SCRIPT_THAANA: return hb_language_from_string("dv", -1); break;
+
+ // Unicode 3.2
+ case HB_SCRIPT_BUHID: return hb_language_from_string("bku", -1); break;
+ case HB_SCRIPT_HANUNOO: return hb_language_from_string("hnn", -1); break;
+ case HB_SCRIPT_TAGALOG: return hb_language_from_string("tl", -1); break;
+ case HB_SCRIPT_TAGBANWA: return hb_language_from_string("tbw", -1); break;
+
+ // Unicode 4.0
+ case HB_SCRIPT_UGARITIC: return hb_language_from_string("uga", -1); break;
+
+ // Unicode 4.1
+ case HB_SCRIPT_BUGINESE: return hb_language_from_string("bug", -1); break;
+ case HB_SCRIPT_OLD_PERSIAN: return hb_language_from_string("peo", -1); break;
+ case HB_SCRIPT_SYLOTI_NAGRI: return hb_language_from_string("syl", -1); break;
+
+ // Unicode 5.0
+ case HB_SCRIPT_NKO: return hb_language_from_string("nko", -1); break;
+
+ // no representative language exists
+ default: return HB_LANGUAGE_INVALID; break;
+ }
+}
+
+/**
+ * \brief Determine language to be used for shaping a run.
+ *
+ * \param shaper shaper instance
+ * \param script script tag associated with run
+ * \return language tag
+ */
+static hb_language_t
+hb_shaper_get_run_language(ASS_Shaper *shaper, hb_script_t script)
+{
+ hb_language_t lang;
+
+ // override set, use it
+ if (shaper->language != HB_LANGUAGE_INVALID)
+ return shaper->language;
+
+ // get default language for given script
+ lang = script_to_language(script);
+
+ // no dice, use system default
+ if (lang == HB_LANGUAGE_INVALID)
+ lang = hb_language_get_default();
+
+ return lang;
+}
+
+/**
* \brief Shape event text with HarfBuzz. Full OpenType shaping.
* \param glyphs glyph clusters
* \param len number of clusters
@@ -433,6 +534,7 @@ static void shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
int k = i;
int level = glyphs[i].shape_run_id;
int direction = shaper->emblevels[k] % 2;
+ hb_script_t script = glyphs[i].script;
while (i < (len - 1) && level == glyphs[i+1].shape_run_id)
i++;
runs[run].offset = k;
@@ -443,7 +545,9 @@ static void shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
hb_buffer_pre_allocate(runs[run].buf, i - k + 1);
hb_buffer_set_direction(runs[run].buf, direction ? HB_DIRECTION_RTL :
HB_DIRECTION_LTR);
- hb_buffer_set_language(runs[run].buf, shaper->language);
+ hb_buffer_set_language(runs[run].buf,
+ hb_shaper_get_run_language(shaper, script));
+ hb_buffer_set_script(runs[run].buf, script);
hb_buffer_add_utf32(runs[run].buf, shaper->event_text + k, i - k + 1,
0, i - k + 1);
hb_shape(runs[run].font, runs[run].buf, shaper->features,
@@ -496,6 +600,56 @@ static void shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
}
}
+
+/**
+ * \brief Determine script property of all characters. Characters of script
+ * common and inherited get their script from their context.
+ *
+ */
+void ass_shaper_determine_script(ASS_Shaper *shaper, GlyphInfo *glyphs,
+ size_t len)
+{
+ int i;
+ int backwards_scan = 0;
+ hb_unicode_funcs_t *ufuncs = hb_unicode_funcs_get_default();
+ hb_script_t last_script = HB_SCRIPT_UNKNOWN;
+
+ // determine script (forward scan)
+ for (i = 0; i < len; i++) {
+ GlyphInfo *info = glyphs + i;
+ info->script = hb_unicode_script(ufuncs, info->symbol);
+
+ // common/inherit codepoints inherit script from context
+ if (info->script == HB_SCRIPT_COMMON ||
+ info->script == HB_SCRIPT_INHERITED) {
+ // unknown is not a valid context
+ if (last_script != HB_SCRIPT_UNKNOWN)
+ info->script = last_script;
+ else
+ // do a backwards scan to check if next codepoint
+ // contains a valid script for context
+ backwards_scan = 1;
+ } else {
+ last_script = info->script;
+ }
+ }
+
+ // determine script (backwards scan, if needed)
+ last_script = HB_SCRIPT_UNKNOWN;
+ for (i = len - 1; i >= 0 && backwards_scan; i--) {
+ GlyphInfo *info = glyphs + i;
+
+ // common/inherit codepoints inherit script from context
+ if (info->script == HB_SCRIPT_COMMON ||
+ info->script == HB_SCRIPT_INHERITED) {
+ // unknown script is not a valid context
+ if (last_script != HB_SCRIPT_UNKNOWN)
+ info->script = last_script;
+ } else {
+ last_script = info->script;
+ }
+ }
+}
#endif
/**
@@ -546,6 +700,11 @@ void ass_shaper_find_runs(ASS_Shaper *shaper, ASS_Renderer *render_priv,
int i;
int shape_run = 0;
+#ifdef CONFIG_HARFBUZZ
+ ass_shaper_determine_script(shaper, glyphs, len);
+#endif
+
+ // find appropriate fonts for the shape runs
for (i = 0; i < len; i++) {
GlyphInfo *last = glyphs + i - 1;
GlyphInfo *info = glyphs + i;
@@ -558,11 +717,11 @@ void ass_shaper_find_runs(ASS_Shaper *shaper, ASS_Renderer *render_priv,
// shape runs share the same font face and size
if (i > 0 && (last->font != info->font ||
last->font_size != info->font_size ||
- last->face_index != info->face_index))
+ last->face_index != info->face_index ||
+ last->script != info->script))
shape_run++;
info->shape_run_id = shape_run;
}
-
}
/**
@@ -582,7 +741,14 @@ void ass_shaper_set_base_direction(ASS_Shaper *shaper, FriBidiParType dir)
void ass_shaper_set_language(ASS_Shaper *shaper, const char *code)
{
#ifdef CONFIG_HARFBUZZ
- shaper->language = hb_language_from_string(code, -1);
+ hb_language_t lang;
+
+ if (code)
+ lang = hb_language_from_string(code, -1);
+ else
+ lang = HB_LANGUAGE_INVALID;
+
+ shaper->language = lang;
#endif
}