summaryrefslogtreecommitdiffstats
path: root/misc
diff options
context:
space:
mode:
authorKacper Michajłow <kasper93@gmail.com>2024-04-17 18:09:55 +0200
committerKacper Michajłow <kasper93@gmail.com>2024-05-09 17:12:55 +0200
commit40ba63405fe732c62a8d43fa6ca3f7a8c7824d4b (patch)
treee0bc17ad041e459561034a31ffc3ad42e82e55ba /misc
parent65c71b164335a6af49591ff775200ca4f274ab6c (diff)
downloadmpv-40ba63405fe732c62a8d43fa6ca3f7a8c7824d4b.tar.bz2
mpv-40ba63405fe732c62a8d43fa6ca3f7a8c7824d4b.tar.xz
Revert "misc: add language-matching utilities"
This reverts commit 8c8d97c26c8b6bef9b8d763db2091e186205ab98.
Diffstat (limited to 'misc')
-rw-r--r--misc/language.c361
-rw-r--r--misc/language.h6
2 files changed, 0 insertions, 367 deletions
diff --git a/misc/language.c b/misc/language.c
deleted file mode 100644
index b94dd8eaf2..0000000000
--- a/misc/language.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Language code utility functions
- *
- * This file is part of mpv.
- *
- * mpv is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * mpv is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "language.h"
-
-#include "common/common.h"
-#include "osdep/strnlen.h"
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-
-static const struct lang {
- char match[4];
- char canonical[4];
-} langmap[] = {
- {"aa", "aar"},
- {"ab", "abk"},
- {"ae", "ave"},
- {"af", "afr"},
- {"ak", "aka"},
- {"am", "amh"},
- {"an", "arg"},
- {"ar", "ara"},
- {"as", "asm"},
- {"av", "ava"},
- {"ay", "aym"},
- {"az", "aze"},
- {"ba", "bak"},
- {"be", "bel"},
- {"bg", "bul"},
- {"bh", "bih"},
- {"bi", "bis"},
- {"bm", "bam"},
- {"bn", "ben"},
- {"bo", "tib"},
- {"bod", "tib"},
- {"br", "bre"},
- {"bs", "bos"},
- {"ca", "cat"},
- {"ce", "che"},
- {"ces", "cze"},
- {"ch", "cha"},
- {"co", "cos"},
- {"cr", "cre"},
- {"cs", "cze"},
- {"cu", "chu"},
- {"cv", "chv"},
- {"cy", "wel"},
- {"cym", "wel"},
- {"da", "dan"},
- {"de", "ger"},
- {"deu", "ger"},
- {"dv", "div"},
- {"dz", "dzo"},
- {"ee", "ewe"},
- {"el", "gre"},
- {"ell", "gre"},
- {"en", "eng"},
- {"eo", "epo"},
- {"es", "spa"},
- {"et", "est"},
- {"eu", "baq"},
- {"eus", "baq"},
- {"fa", "per"},
- {"fas", "per"},
- {"ff", "ful"},
- {"fi", "fin"},
- {"fj", "fij"},
- {"fo", "fao"},
- {"fr", "fre"},
- {"fra", "fre"},
- {"fy", "fry"},
- {"ga", "gle"},
- {"gd", "gla"},
- {"gl", "glg"},
- {"gn", "grn"},
- {"gu", "guj"},
- {"gv", "glv"},
- {"ha", "hau"},
- {"he", "heb"},
- {"hi", "hin"},
- {"ho", "hmo"},
- {"hr", "hrv"},
- {"ht", "hat"},
- {"hu", "hun"},
- {"hy", "arm"},
- {"hye", "arm"},
- {"hz", "her"},
- {"ia", "ina"},
- {"id", "ind"},
- {"ie", "ile"},
- {"ig", "ibo"},
- {"ii", "iii"},
- {"ik", "ipk"},
- {"io", "ido"},
- {"is", "ice"},
- {"isl", "ice"},
- {"it", "ita"},
- {"iu", "iku"},
- {"ja", "jpn"},
- {"jv", "jav"},
- {"ka", "geo"},
- {"kat", "geo"},
- {"kg", "kon"},
- {"ki", "kik"},
- {"kj", "kua"},
- {"kk", "kaz"},
- {"kl", "kal"},
- {"km", "khm"},
- {"kn", "kan"},
- {"ko", "kor"},
- {"kr", "kau"},
- {"ks", "kas"},
- {"ku", "kur"},
- {"kv", "kom"},
- {"kw", "cor"},
- {"ky", "kir"},
- {"la", "lat"},
- {"lb", "ltz"},
- {"lg", "lug"},
- {"li", "lim"},
- {"ln", "lin"},
- {"lo", "lao"},
- {"lt", "lit"},
- {"lu", "lub"},
- {"lv", "lav"},
- {"mg", "mlg"},
- {"mh", "mah"},
- {"mi", "mao"},
- {"mk", "mac"},
- {"mkd", "mac"},
- {"ml", "mal"},
- {"mn", "mon"},
- {"mr", "mar"},
- {"mri", "mao"},
- {"ms", "may"},
- {"msa", "may"},
- {"mt", "mlt"},
- {"my", "bur"},
- {"mya", "bur"},
- {"na", "nau"},
- {"nb", "nob"},
- {"nd", "nde"},
- {"ne", "nep"},
- {"ng", "ndo"},
- {"nl", "dut"},
- {"nld", "dut"},
- {"nn", "nno"},
- {"no", "nor"},
- {"nr", "nbl"},
- {"nv", "nav"},
- {"ny", "nya"},
- {"oc", "oci"},
- {"oj", "oji"},
- {"om", "orm"},
- {"or", "ori"},
- {"os", "oss"},
- {"pa", "pan"},
- {"pi", "pli"},
- {"pl", "pol"},
- {"ps", "pus"},
- {"pt", "por"},
- {"qu", "que"},
- {"rm", "roh"},
- {"rn", "run"},
- {"ro", "rum"},
- {"ron", "rum"},
- {"ru", "rus"},
- {"rw", "kin"},
- {"sa", "san"},
- {"sc", "srd"},
- {"sd", "snd"},
- {"se", "sme"},
- {"sg", "sag"},
- {"si", "sin"},
- {"sk", "slo"},
- {"sl", "slv"},
- {"slk", "slo"},
- {"sm", "smo"},
- {"sn", "sna"},
- {"so", "som"},
- {"sq", "alb"},
- {"sqi", "alb"},
- {"sr", "srp"},
- {"ss", "ssw"},
- {"st", "sot"},
- {"su", "sun"},
- {"sv", "swe"},
- {"sw", "swa"},
- {"ta", "tam"},
- {"te", "tel"},
- {"tg", "tgk"},
- {"th", "tha"},
- {"ti", "tir"},
- {"tk", "tuk"},
- {"tl", "tgl"},
- {"tn", "tsn"},
- {"to", "ton"},
- {"tr", "tur"},
- {"ts", "tso"},
- {"tt", "tat"},
- {"tw", "twi"},
- {"ty", "tah"},
- {"ug", "uig"},
- {"uk", "ukr"},
- {"ur", "urd"},
- {"uz", "uzb"},
- {"ve", "ven"},
- {"vi", "vie"},
- {"vo", "vol"},
- {"wa", "wln"},
- {"wo", "wol"},
- {"xh", "xho"},
- {"yi", "yid"},
- {"yo", "yor"},
- {"za", "zha"},
- {"zh", "chi"},
- {"zho", "chi"},
- {"zu", "zul"},
-};
-
-struct langsearch {
- const char *str;
- size_t size;
-};
-
-static int lang_compare(const void *s, const void *k)
-{
- const struct langsearch *search = s;
- const struct lang *key = k;
-
- int ret = strncasecmp(search->str, key->match, search->size);
- if (!ret && search->size < sizeof(key->match) && key->match[search->size])
- return 1;
- return ret;
-}
-
-static void canonicalize(const char **lang, size_t *size)
-{
- if (*size > sizeof(langmap[0].match))
- return;
-
- struct langsearch search = {*lang, *size};
- struct lang *l = bsearch(&search, langmap, MP_ARRAY_SIZE(langmap), sizeof(langmap[0]),
- &lang_compare);
-
- if (l) {
- *lang = l->canonical;
- *size = strnlen(l->canonical, sizeof(l->canonical));
- }
-}
-
-static bool tag_matches(const char *l1, size_t s1, const char *l2, size_t s2)
-{
- return s1 == s2 && !strncasecmp(l1, l2, s1);
-}
-
-int mp_match_lang_single(const char *l1, const char *l2)
-{
- // We never consider null or empty strings to match
- if (!l1 || !l2 || !*l1 || !*l2)
- return 0;
-
- // The first subtag should always be a language; canonicalize to 3-letter ISO 639-2B (arbitrarily chosen)
- size_t s1 = strcspn(l1, "-_");
- size_t s2 = strcspn(l2, "-_");
-
- const char *l1c = l1;
- const char *l2c = l2;
- size_t s1c = s1;
- size_t s2c = s2;
-
- canonicalize(&l1c, &s1c);
- canonicalize(&l2c, &s2c);
-
- // If the first subtags don't match, we have no match at all
- if (!tag_matches(l1c, s1c, l2c, s2c))
- return 0;
-
- // Attempt to match each subtag in each string against each in the other
- int score = 1;
- bool x1 = false;
- int count = 0;
- for (;;) {
- l1 += s1;
-
- while (*l1 == '-' || *l1 == '_')
- l1++;
-
- if (!*l1)
- break;
-
- s1 = strcspn(l1, "-_");
- if (tag_matches(l1, s1, "x", 1)) {
- x1 = true;
- continue;
- }
-
- const char *l2o = l2;
- size_t s2o = s2;
- bool x2 = false;
- for (;;) {
- l2 += s2;
-
- while (*l2 == '-' || *l2 == '_')
- l2++;
-
- if (!*l2)
- break;
-
- s2 = strcspn(l2, "-_");
- if (tag_matches(l2, s2, "x", 1)) {
- x2 = true;
- if (!x1)
- break;
- continue;
- }
-
- // Private-use subtags only match against other private-use subtags
- if (x1 && !x2)
- continue;
-
- if (tag_matches(l1c, s1c, l2c, s2c)) {
- // Matches for subtags earlier in the user's string take priority over later ones,
- // for up to LANGUAGE_SCORE_BITS subtags
- int shift = (LANGUAGE_SCORE_BITS - count - 1);
- if (shift < 0)
- shift = 0;
- score += (1 << shift);
-
- if (score >= LANGUAGE_SCORE_MAX)
- return LANGUAGE_SCORE_MAX;
- }
- }
-
- l2 = l2o;
- s2 = s2o;
-
- count++;
- }
-
- return score;
-}
diff --git a/misc/language.h b/misc/language.h
index 250d39137c..ef9388fb8b 100644
--- a/misc/language.h
+++ b/misc/language.h
@@ -20,12 +20,6 @@
#ifndef MP_LANGUAGE_H
#define MP_LANGUAGE_H
-#define LANGUAGE_SCORE_BITS 16
-#define LANGUAGE_SCORE_MAX (1 << LANGUAGE_SCORE_BITS)
-
-// Where applicable, l1 is the user-specified code and l2 is the code being checked against it
-int mp_match_lang_single(const char *l1, const char *l2);
-
char **mp_get_user_langs(void);
#endif /* MP_LANGUAGE_H */