summaryrefslogtreecommitdiffstats
path: root/misc
diff options
context:
space:
mode:
authorKacper Michajłow <kasper93@gmail.com>2024-04-17 18:42:15 +0200
committerKacper Michajłow <kasper93@gmail.com>2024-05-09 17:12:55 +0200
commit5009e134313bd7b978f1c56d5d94a77ab53ac85b (patch)
tree77ee5e50551f05d21091479333f11fc239611f40 /misc
parent40ba63405fe732c62a8d43fa6ca3f7a8c7824d4b (diff)
downloadmpv-5009e134313bd7b978f1c56d5d94a77ab53ac85b.tar.bz2
mpv-5009e134313bd7b978f1c56d5d94a77ab53ac85b.tar.xz
player/loadfile: match language and subcodes
Diffstat (limited to 'misc')
-rw-r--r--misc/language.c295
-rw-r--r--misc/language.h2
2 files changed, 297 insertions, 0 deletions
diff --git a/misc/language.c b/misc/language.c
new file mode 100644
index 0000000000..e85100a202
--- /dev/null
+++ b/misc/language.c
@@ -0,0 +1,295 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "language.h"
+
+#include <limits.h>
+#include <stdint.h>
+
+#include "common/common.h"
+#include "misc/bstr.h"
+
+static const struct lang {
+ char match[4];
+ char canonical[4];
+} langmap[] = {
+ {"aa", "aar"},
+ {"ab", "abk"},
+ {"ae", "ave"},
+ {"af", "afr"},
+ {"ak", "aka"},
+ {"am", "amh"},
+ {"an", "arg"},
+ {"ar", "ara"},
+ {"as", "asm"},
+ {"av", "ava"},
+ {"ay", "aym"},
+ {"az", "aze"},
+ {"ba", "bak"},
+ {"be", "bel"},
+ {"bg", "bul"},
+ {"bh", "bih"},
+ {"bi", "bis"},
+ {"bm", "bam"},
+ {"bn", "ben"},
+ {"bo", "tib"},
+ {"bod", "tib"},
+ {"br", "bre"},
+ {"bs", "bos"},
+ {"ca", "cat"},
+ {"ce", "che"},
+ {"ces", "cze"},
+ {"ch", "cha"},
+ {"co", "cos"},
+ {"cr", "cre"},
+ {"cs", "cze"},
+ {"cu", "chu"},
+ {"cv", "chv"},
+ {"cy", "wel"},
+ {"cym", "wel"},
+ {"da", "dan"},
+ {"de", "ger"},
+ {"deu", "ger"},
+ {"dv", "div"},
+ {"dz", "dzo"},
+ {"ee", "ewe"},
+ {"el", "gre"},
+ {"ell", "gre"},
+ {"en", "eng"},
+ {"eo", "epo"},
+ {"es", "spa"},
+ {"et", "est"},
+ {"eu", "baq"},
+ {"eus", "baq"},
+ {"fa", "per"},
+ {"fas", "per"},
+ {"ff", "ful"},
+ {"fi", "fin"},
+ {"fj", "fij"},
+ {"fo", "fao"},
+ {"fr", "fre"},
+ {"fra", "fre"},
+ {"fy", "fry"},
+ {"ga", "gle"},
+ {"gd", "gla"},
+ {"gl", "glg"},
+ {"gn", "grn"},
+ {"gu", "guj"},
+ {"gv", "glv"},
+ {"ha", "hau"},
+ {"he", "heb"},
+ {"hi", "hin"},
+ {"ho", "hmo"},
+ {"hr", "hrv"},
+ {"ht", "hat"},
+ {"hu", "hun"},
+ {"hy", "arm"},
+ {"hye", "arm"},
+ {"hz", "her"},
+ {"ia", "ina"},
+ {"id", "ind"},
+ {"ie", "ile"},
+ {"ig", "ibo"},
+ {"ii", "iii"},
+ {"ik", "ipk"},
+ {"io", "ido"},
+ {"is", "ice"},
+ {"isl", "ice"},
+ {"it", "ita"},
+ {"iu", "iku"},
+ {"ja", "jpn"},
+ {"jv", "jav"},
+ {"ka", "geo"},
+ {"kat", "geo"},
+ {"kg", "kon"},
+ {"ki", "kik"},
+ {"kj", "kua"},
+ {"kk", "kaz"},
+ {"kl", "kal"},
+ {"km", "khm"},
+ {"kn", "kan"},
+ {"ko", "kor"},
+ {"kr", "kau"},
+ {"ks", "kas"},
+ {"ku", "kur"},
+ {"kv", "kom"},
+ {"kw", "cor"},
+ {"ky", "kir"},
+ {"la", "lat"},
+ {"lb", "ltz"},
+ {"lg", "lug"},
+ {"li", "lim"},
+ {"ln", "lin"},
+ {"lo", "lao"},
+ {"lt", "lit"},
+ {"lu", "lub"},
+ {"lv", "lav"},
+ {"mg", "mlg"},
+ {"mh", "mah"},
+ {"mi", "mao"},
+ {"mk", "mac"},
+ {"mkd", "mac"},
+ {"ml", "mal"},
+ {"mn", "mon"},
+ {"mr", "mar"},
+ {"mri", "mao"},
+ {"ms", "may"},
+ {"msa", "may"},
+ {"mt", "mlt"},
+ {"my", "bur"},
+ {"mya", "bur"},
+ {"na", "nau"},
+ {"nb", "nob"},
+ {"nd", "nde"},
+ {"ne", "nep"},
+ {"ng", "ndo"},
+ {"nl", "dut"},
+ {"nld", "dut"},
+ {"nn", "nno"},
+ {"no", "nor"},
+ {"nr", "nbl"},
+ {"nv", "nav"},
+ {"ny", "nya"},
+ {"oc", "oci"},
+ {"oj", "oji"},
+ {"om", "orm"},
+ {"or", "ori"},
+ {"os", "oss"},
+ {"pa", "pan"},
+ {"pi", "pli"},
+ {"pl", "pol"},
+ {"ps", "pus"},
+ {"pt", "por"},
+ {"qu", "que"},
+ {"rm", "roh"},
+ {"rn", "run"},
+ {"ro", "rum"},
+ {"ron", "rum"},
+ {"ru", "rus"},
+ {"rw", "kin"},
+ {"sa", "san"},
+ {"sc", "srd"},
+ {"sd", "snd"},
+ {"se", "sme"},
+ {"sg", "sag"},
+ {"si", "sin"},
+ {"sk", "slo"},
+ {"sl", "slv"},
+ {"slk", "slo"},
+ {"sm", "smo"},
+ {"sn", "sna"},
+ {"so", "som"},
+ {"sq", "alb"},
+ {"sqi", "alb"},
+ {"sr", "srp"},
+ {"ss", "ssw"},
+ {"st", "sot"},
+ {"su", "sun"},
+ {"sv", "swe"},
+ {"sw", "swa"},
+ {"ta", "tam"},
+ {"te", "tel"},
+ {"tg", "tgk"},
+ {"th", "tha"},
+ {"ti", "tir"},
+ {"tk", "tuk"},
+ {"tl", "tgl"},
+ {"tn", "tsn"},
+ {"to", "ton"},
+ {"tr", "tur"},
+ {"ts", "tso"},
+ {"tt", "tat"},
+ {"tw", "twi"},
+ {"ty", "tah"},
+ {"ug", "uig"},
+ {"uk", "ukr"},
+ {"ur", "urd"},
+ {"uz", "uzb"},
+ {"ve", "ven"},
+ {"vi", "vie"},
+ {"vo", "vol"},
+ {"wa", "wln"},
+ {"wo", "wol"},
+ {"xh", "xho"},
+ {"yi", "yid"},
+ {"yo", "yor"},
+ {"za", "zha"},
+ {"zh", "chi"},
+ {"zho", "chi"},
+ {"zu", "zul"},
+};
+
+static int lang_compare(const void *key, const void *lang)
+{
+ return bstrcasecmp0(*(const bstr*)key, ((const struct lang*)lang)->match);
+}
+
+static bstr canonicalize(bstr lang)
+{
+ const struct lang *l = bsearch(&lang, langmap, MP_ARRAY_SIZE(langmap),
+ sizeof(langmap[0]), &lang_compare);
+ return l ? bstr0(l->canonical) : lang;
+}
+
+int mp_match_lang(char **langs, const char *lang)
+{
+ if (!lang)
+ return 0;
+
+ void *ta_ctx = talloc_new(NULL);
+ int lang_parts_n = 0;
+ bstr *lang_parts = NULL;
+ bstr rest = bstr0(lang);
+ while (rest.len) {
+ bstr s = bstr_split(rest, "-", &rest);
+ MP_TARRAY_APPEND(ta_ctx, lang_parts, lang_parts_n, s);
+ }
+
+ int best_score = 0;
+ if (!lang_parts_n)
+ goto done;
+
+ for (int idx = 0; langs && langs[idx]; idx++) {
+ rest = bstr0(langs[idx]);
+ int part = 0;
+ int score = 0;
+ while (rest.len) {
+ bstr s = bstr_split(rest, "-", &rest);
+ if (!part) {
+ if (bstrcasecmp(canonicalize(lang_parts[0]), canonicalize(s)))
+ break;
+ score = INT_MAX - idx;
+ part++;
+ continue;
+ }
+
+ if (part >= lang_parts_n)
+ break;
+
+ if (bstrcasecmp(lang_parts[part], s))
+ score -= 1000;
+
+ part++;
+ }
+ score -= (lang_parts_n - part) * 1000;
+ best_score = MPMAX(best_score, score);
+ }
+
+done:
+ talloc_free(ta_ctx);
+ return best_score;
+}
diff --git a/misc/language.h b/misc/language.h
index ef9388fb8b..d765e6614a 100644
--- a/misc/language.h
+++ b/misc/language.h
@@ -20,6 +20,8 @@
#ifndef MP_LANGUAGE_H
#define MP_LANGUAGE_H
+// Result numerically higher => better match. 0 == no match.
+int mp_match_lang(char **langs, const char *lang);
char **mp_get_user_langs(void);
#endif /* MP_LANGUAGE_H */