summaryrefslogtreecommitdiffstats
path: root/bstr.c
diff options
context:
space:
mode:
authorwm4 <wm4@mplayer2.org>2012-01-13 07:38:40 +0100
committerwm4 <wm4@mplayer2.org>2012-01-14 14:35:39 +0100
commit7700e6effca6358820cb969ddf896a2be4b77ede (patch)
tree23fd1dabddaadb9bad35e0b6a03a0226b48bcc94 /bstr.c
parent827faa38436f55fbb15b7dce4abcc5c6608a428b (diff)
downloadmpv-7700e6effca6358820cb969ddf896a2be4b77ede.tar.bz2
mpv-7700e6effca6358820cb969ddf896a2be4b77ede.tar.xz
bstr: add function for UTF-8 parsing (taken from libav)
Parts taken from libavutil's GET_UTF8 and slightly modified.
Diffstat (limited to 'bstr.c')
-rw-r--r--bstr.c32
1 files changed, 32 insertions, 0 deletions
diff --git a/bstr.c b/bstr.c
index 219c136d7c..0c46b1d9b0 100644
--- a/bstr.c
+++ b/bstr.c
@@ -201,3 +201,35 @@ int bstr_sscanf(struct bstr str, const char *format, ...)
talloc_free(ptr);
return ret;
}
+
+int bstr_parse_utf8_code_length(unsigned char b)
+{
+ if (b < 128)
+ return 1;
+ int bytes = 7 - av_log2(b ^ 255);
+ return (bytes >= 2 && bytes <= 4) ? bytes : -1;
+}
+
+int bstr_decode_utf8(struct bstr s, struct bstr *out_next)
+{
+ if (s.len == 0)
+ return -1;
+ unsigned int codepoint = s.start[0];
+ s.start++; s.len--;
+ if (codepoint >= 128) {
+ int bytes = bstr_parse_utf8_code_length(codepoint);
+ if (bytes < 0 || s.len < bytes - 1)
+ return -1;
+ codepoint &= 127 >> bytes;
+ for (int n = 1; n < bytes; n++) {
+ int tmp = s.start[0];
+ if ((tmp & 0xC0) != 0x80)
+ return -1;
+ codepoint = (codepoint << 6) | (tmp & ~0xC0);
+ s.start++; s.len--;
+ }
+ }
+ if (out_next)
+ *out_next = s;
+ return codepoint;
+}