summaryrefslogtreecommitdiffstats
path: root/misc
diff options
context:
space:
mode:
authorGuido Cella <guido@guidocella.xyz>2024-05-09 21:37:53 +0200
committerKacper Michajłow <kasper93@gmail.com>2024-05-10 01:22:31 +0200
commit20b8fe05bf6103e1797191929056cc2bbeec4516 (patch)
treecd915d400d146cac1c6b30bafa51dafde4fccbed /misc
parentdffc37dcfaaee26ec0789738c3f1c715d44696a4 (diff)
downloadmpv-20b8fe05bf6103e1797191929056cc2bbeec4516.tar.bz2
mpv-20b8fe05bf6103e1797191929056cc2bbeec4516.tar.xz
misc/language: move mp_guess_lang_from_filename() here
Diffstat (limited to 'misc')
-rw-r--r--misc/language.c55
-rw-r--r--misc/language.h3
2 files changed, 57 insertions, 1 deletions
diff --git a/misc/language.c b/misc/language.c
index 0d9d0689da..028437f1b6 100644
--- a/misc/language.c
+++ b/misc/language.c
@@ -21,7 +21,7 @@
#include <stdint.h>
#include "common/common.h"
-#include "misc/bstr.h"
+#include "misc/ctype.h"
#define L(s) { #s, sizeof(#s) - 1 }
@@ -296,3 +296,56 @@ done:
talloc_free(ta_ctx);
return best_score;
}
+
+bstr mp_guess_lang_from_filename(bstr name, int *lang_start)
+{
+ name = bstr_strip(bstr_strip_ext(name));
+
+ if (name.len < 2)
+ return (bstr){0};
+
+ int lang_length = 0;
+ int i = name.len - 1;
+ int suffixes_length = 0;
+
+ char delimiter = '.';
+ if (name.start[i] == ')') {
+ delimiter = '(';
+ i--;
+ }
+ if (name.start[i] == ']') {
+ delimiter = '[';
+ i--;
+ }
+
+ while (true) {
+ while (i >= 0 && mp_isalpha(name.start[i])) {
+ lang_length++;
+ i--;
+ }
+
+ // According to
+ // https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags
+ // subtags after the first are composed of 1 to 8 letters.
+ if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8)
+ return (bstr){0};
+
+ if (i >= 0 && name.start[i] == '-') {
+ lang_length++;
+ i--;
+ suffixes_length = lang_length;
+ } else {
+ break;
+ }
+ }
+
+ // The primary subtag can have 2 or 3 letters.
+ if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 ||
+ i <= 0 || name.start[i] != delimiter)
+ return (bstr){0};
+
+ if (lang_start)
+ *lang_start = i;
+
+ return (bstr){name.start + i + 1, lang_length};
+}
diff --git a/misc/language.h b/misc/language.h
index d765e6614a..ed57e75d4d 100644
--- a/misc/language.h
+++ b/misc/language.h
@@ -20,8 +20,11 @@
#ifndef MP_LANGUAGE_H
#define MP_LANGUAGE_H
+#include "misc/bstr.h"
+
// Result numerically higher => better match. 0 == no match.
int mp_match_lang(char **langs, const char *lang);
char **mp_get_user_langs(void);
+bstr mp_guess_lang_from_filename(bstr name, int *lang_start);
#endif /* MP_LANGUAGE_H */