summaryrefslogtreecommitdiffstats
path: root/libass/ass_utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'libass/ass_utils.c')
-rw-r--r--libass/ass_utils.c76
1 files changed, 76 insertions, 0 deletions
diff --git a/libass/ass_utils.c b/libass/ass_utils.c
index 0758f7dbb7..6c3f9f0e77 100644
--- a/libass/ass_utils.c
+++ b/libass/ass_utils.c
@@ -22,9 +22,14 @@
#include <stdlib.h>
#include <inttypes.h>
+#include <string.h>
#include <sys/time.h>
#include <time.h>
+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
#include "mp_msg.h"
#include "ass_utils.h"
@@ -81,3 +86,74 @@ int strtocolor(char** q, uint32_t* res)
return result;
}
+unsigned ass_utf8_get_char(char **str)
+{
+ uint8_t *strp = (uint8_t *)*str;
+ unsigned c = *strp++;
+ unsigned mask = 0x80;
+ int len = -1;
+ while (c & mask) {
+ mask >>= 1;
+ len++;
+ }
+ if (len <= 0 || len > 4)
+ goto no_utf8;
+ c &= mask - 1;
+ while ((*strp & 0xc0) == 0x80) {
+ if (len-- <= 0)
+ goto no_utf8;
+ c = (c << 6) | (*strp++ & 0x3f);
+ }
+ if (len)
+ goto no_utf8;
+ *str = (char *)strp;
+ return c;
+
+no_utf8:
+ strp = (uint8_t *)*str;
+ c = *strp++;
+ *str = (char *)strp;
+ return c;
+}
+
+#ifdef HAVE_ENCA
+void* ass_guess_buffer_cp(unsigned char* buffer, int buflen, char *preferred_language, char *fallback)
+{
+ const char **languages;
+ size_t langcnt;
+ EncaAnalyser analyser;
+ EncaEncoding encoding;
+ char *detected_sub_cp = NULL;
+ int i;
+
+ languages = enca_get_languages(&langcnt);
+ mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: ");
+ for (i = 0; i < langcnt; i++) {
+ mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]);
+ }
+ mp_msg(MSGT_SUBREADER, MSGL_V, "\n");
+
+ for (i = 0; i < langcnt; i++) {
+ const char *tmp;
+
+ if (strcasecmp(languages[i], preferred_language) != 0) continue;
+ analyser = enca_analyser_alloc(languages[i]);
+ encoding = enca_analyse_const(analyser, buffer, buflen);
+ tmp = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV);
+ if (tmp && encoding.charset != ENCA_CS_UNKNOWN) {
+ detected_sub_cp = strdup(tmp);
+ mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", tmp);
+ }
+ enca_analyser_free(analyser);
+ }
+
+ free(languages);
+
+ if (!detected_sub_cp) {
+ detected_sub_cp = strdup(fallback);
+ mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detection failed: fallback to %s\n", fallback);
+ }
+
+ return detected_sub_cp;
+}
+#endif