summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorApache553 <Apache553@outlook.com>2022-01-10 14:33:42 +0800
committerApache553 <Apache553@outlook.com>2022-01-30 01:58:36 +0800
commitf57084876060662d47f377e5735913215c04b329 (patch)
tree673dd02e1c5b683cee9afc5d323166472635aaa7
parentbfa1dbe8680f5b709828a4b317992a793cf26856 (diff)
downloadlibass-f57084876060662d47f377e5735913215c04b329.tar.bz2
libass-f57084876060662d47f377e5735913215c04b329.tar.xz
font: use iconv to handle non-unicode cmap microsoft fonts
Use Windows native API to convert Unicode to MBCS on Windows, then use iconv if not on Windows and iconv is available. Do nothing if neither is available. fix: `ass_font_index_magic` now doesn't mangle codepoint if charmap isn't Microsoft's.
-rw-r--r--libass/ass_font.c173
1 files changed, 167 insertions, 6 deletions
diff --git a/libass/ass_font.c b/libass/ass_font.c
index 79ab22b..3f44a1f 100644
--- a/libass/ass_font.c
+++ b/libass/ass_font.c
@@ -26,6 +26,7 @@
#include FT_GLYPH_H
#include FT_TRUETYPE_TABLES_H
#include FT_OUTLINE_H
+#include FT_TRUETYPE_IDS_H
#include <limits.h>
#include "ass.h"
@@ -36,6 +37,156 @@
#include "ass_shaper.h"
/**
+ * Get the mbcs codepoint from the output bytes of iconv/WideCharToMultiByte,
+ * by treating the bytes as a prefix-zero-byte-omitted big-endian integer.
+ */
+static inline uint32_t pack_mbcs_bytes(const char *bytes, size_t length)
+{
+ uint32_t ret = 0;
+ for (size_t i = 0; i < length; ++i) {
+ ret <<= 8;
+ ret |= (uint8_t) bytes[i];
+ }
+ return ret;
+}
+
+/**
+ * Convert a UCS-4 code unit to a packed uint32_t in given multibyte encoding.
+ *
+ * We don't exclude Cygwin for Windows since we use WideCharToMultiByte only,
+ * this shall not violate any Cygwin restrictions on Windows APIs.
+ */
+#if defined(_WIN32)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+static uint32_t convert_unicode_to_mb(FT_Encoding encoding, uint32_t codepoint)
+{
+ // map freetype encoding to windows codepage
+ UINT codepage;
+ switch (encoding) {
+ case FT_ENCODING_MS_SJIS:
+ codepage = 932;
+ break;
+ case FT_ENCODING_MS_GB2312:
+ codepage = 936;
+ break;
+ case FT_ENCODING_MS_BIG5:
+ codepage = 950;
+ break;
+ case FT_ENCODING_MS_WANSUNG:
+ codepage = 949;
+ break;
+ case FT_ENCODING_MS_JOHAB:
+ codepage = 1361;
+ break;
+ default:
+ return codepoint;
+ }
+
+ WCHAR input_buffer[2];
+ size_t inbuf_size;
+
+ // encode surrogate pair, assuming codepoint > 0 && codepoint <= 10FFFF
+ if (codepoint >= 0x10000) {
+ // surrogate pair required
+ inbuf_size = 2;
+ input_buffer[0] = 0xD7C0 + (codepoint >> 10);
+ input_buffer[1] = 0xDC00 + (codepoint & 0x3FF);
+ } else {
+ inbuf_size = 1;
+ input_buffer[0] = codepoint;
+ }
+
+ // do convert
+ char output_buffer[2];
+ BOOL conversion_fail;
+ int output_length = WideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, input_buffer, inbuf_size,
+ output_buffer, sizeof(output_buffer), NULL, &conversion_fail);
+ if (output_length == 0 || conversion_fail)
+ return codepoint;
+
+ return pack_mbcs_bytes(output_buffer, output_length);
+}
+#elif defined(CONFIG_ICONV)
+
+#include <iconv.h>
+
+static uint32_t convert_unicode_to_mb(FT_Encoding encoding, uint32_t codepoint)
+{
+ typedef struct { const char *names[5]; } EncodingList;
+
+ EncodingList encoding_list;
+
+ // map freetype encoding to iconv encoding
+ switch (encoding) {
+ case FT_ENCODING_MS_SJIS:
+ encoding_list = (EncodingList) {{"CP932", "SHIFT_JIS", NULL}};
+ break;
+ case FT_ENCODING_MS_GB2312:
+ encoding_list = (EncodingList) {{"CP936", "GBK", "GB18030", "GB2312", NULL}};
+ break;
+ case FT_ENCODING_MS_BIG5:
+ encoding_list = (EncodingList) {{"CP950", "BIG5", NULL}};
+ break;
+ case FT_ENCODING_MS_WANSUNG:
+ encoding_list = (EncodingList) {{"CP949", "EUC-KR", NULL}};
+ break;
+ case FT_ENCODING_MS_JOHAB:
+ encoding_list = (EncodingList) {{"CP1361", "JOHAB", NULL}};
+ break;
+ default:
+ return codepoint;
+ }
+
+ // open iconv context
+ const char **encoding_str = encoding_list.names;
+ iconv_t cd = (iconv_t) -1;
+ while (*encoding_str) {
+ cd = iconv_open(*encoding_str, "UTF-32LE");
+ if (cd != (iconv_t) -1) break;
+ ++encoding_str;
+ }
+ if (cd == (iconv_t) -1)
+ return codepoint;
+
+ char input_buffer[4];
+ char output_buffer[2]; // MS-flavour encodings only need 2 bytes
+ uint32_t result = codepoint;
+
+ // convert input codepoint to little endian uint32_t bytearray
+ for (int i = 0; i < 4; ++i) {
+ input_buffer[i] = result & 0xFF;
+ result >>= 8;
+ }
+ result = codepoint;
+
+ // do actual convert, only reversible converts are valid, since we are converting unicode to something else
+ size_t inbuf_size = sizeof(input_buffer);
+ size_t outbuf_size = sizeof(output_buffer);
+ char *inbuf = input_buffer;
+ char *outbuf = output_buffer;
+ if (iconv(cd, &inbuf, &inbuf_size, &outbuf, &outbuf_size))
+ goto clean;
+
+ // now we have multibyte string in output_buffer
+ // assemble those bytes into uint32_t
+ size_t output_length = sizeof(output_buffer) - outbuf_size;
+ result = pack_mbcs_bytes(output_buffer, output_length);
+
+clean:
+ iconv_close(cd);
+ return result;
+}
+#else
+static uint32_t convert_unicode_to_mb(FT_Encoding encoding, uint32_t codepoint) {
+ // just a stub
+ return codepoint;
+}
+#endif
+
+/**
* Select a good charmap, prefer Microsoft Unicode charmaps.
* Otherwise, let FreeType decide.
*/
@@ -79,7 +230,7 @@ void charmap_magic(ASS_Library *library, FT_Face face)
/**
* Adjust char index if the charmap is weird
- * (currently just MS Symbol)
+ * (currently all non-Unicode Microsoft cmap)
*/
uint32_t ass_font_index_magic(FT_Face face, uint32_t symbol)
@@ -87,12 +238,22 @@ uint32_t ass_font_index_magic(FT_Face face, uint32_t symbol)
if (!face->charmap)
return symbol;
- switch (face->charmap->encoding) {
- case FT_ENCODING_MS_SYMBOL:
- return 0xF000 | symbol;
- default:
- return symbol;
+ if (face->charmap->platform_id == TT_PLATFORM_MICROSOFT) {
+ switch (face->charmap->encoding) {
+ case FT_ENCODING_MS_SYMBOL:
+ return 0xF000 | symbol;
+ case FT_ENCODING_MS_SJIS:
+ case FT_ENCODING_MS_GB2312:
+ case FT_ENCODING_MS_BIG5:
+ case FT_ENCODING_MS_WANSUNG:
+ case FT_ENCODING_MS_JOHAB:
+ return convert_unicode_to_mb(face->charmap->encoding, symbol);
+ default:
+ return symbol;
+ }
}
+
+ return symbol;
}
static void set_font_metrics(FT_Face face)