diff options
author | Guido Cella <guido@guidocella.xyz> | 2024-05-09 21:37:53 +0200 |
---|---|---|
committer | Kacper Michajłow <kasper93@gmail.com> | 2024-05-10 01:22:31 +0200 |
commit | 20b8fe05bf6103e1797191929056cc2bbeec4516 (patch) | |
tree | cd915d400d146cac1c6b30bafa51dafde4fccbed /misc | |
parent | dffc37dcfaaee26ec0789738c3f1c715d44696a4 (diff) | |
download | mpv-20b8fe05bf6103e1797191929056cc2bbeec4516.tar.bz2 mpv-20b8fe05bf6103e1797191929056cc2bbeec4516.tar.xz |
misc/language: move mp_guess_lang_from_filename() here
Diffstat (limited to 'misc')
-rw-r--r-- | misc/language.c | 55 | ||||
-rw-r--r-- | misc/language.h | 3 |
2 files changed, 57 insertions, 1 deletions
diff --git a/misc/language.c b/misc/language.c index 0d9d0689da..028437f1b6 100644 --- a/misc/language.c +++ b/misc/language.c @@ -21,7 +21,7 @@ #include <stdint.h> #include "common/common.h" -#include "misc/bstr.h" +#include "misc/ctype.h" #define L(s) { #s, sizeof(#s) - 1 } @@ -296,3 +296,56 @@ done: talloc_free(ta_ctx); return best_score; } + +bstr mp_guess_lang_from_filename(bstr name, int *lang_start) +{ + name = bstr_strip(bstr_strip_ext(name)); + + if (name.len < 2) + return (bstr){0}; + + int lang_length = 0; + int i = name.len - 1; + int suffixes_length = 0; + + char delimiter = '.'; + if (name.start[i] == ')') { + delimiter = '('; + i--; + } + if (name.start[i] == ']') { + delimiter = '['; + i--; + } + + while (true) { + while (i >= 0 && mp_isalpha(name.start[i])) { + lang_length++; + i--; + } + + // According to + // https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags + // subtags after the first are composed of 1 to 8 letters. + if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8) + return (bstr){0}; + + if (i >= 0 && name.start[i] == '-') { + lang_length++; + i--; + suffixes_length = lang_length; + } else { + break; + } + } + + // The primary subtag can have 2 or 3 letters. + if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 || + i <= 0 || name.start[i] != delimiter) + return (bstr){0}; + + if (lang_start) + *lang_start = i; + + return (bstr){name.start + i + 1, lang_length}; +} diff --git a/misc/language.h b/misc/language.h index d765e6614a..ed57e75d4d 100644 --- a/misc/language.h +++ b/misc/language.h @@ -20,8 +20,11 @@ #ifndef MP_LANGUAGE_H #define MP_LANGUAGE_H +#include "misc/bstr.h" + // Result numerically higher => better match. 0 == no match. int mp_match_lang(char **langs, const char *lang); char **mp_get_user_langs(void); +bstr mp_guess_lang_from_filename(bstr name, int *lang_start); #endif /* MP_LANGUAGE_H */ |