From ca8937d7d269c0ef8881d2ac7a227fdb990a5753 Mon Sep 17 00:00:00 2001 From: wm4 Date: Wed, 15 Jan 2014 16:13:07 +0100 Subject: bstr: add function for splitting UTF-8 --- bstr/bstr.c | 11 +++++++++++ bstr/bstr.h | 8 +++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/bstr/bstr.c b/bstr/bstr.c index aacbdc7dbc..964934a100 100644 --- a/bstr/bstr.c +++ b/bstr/bstr.c @@ -296,6 +296,17 @@ int bstr_decode_utf8(struct bstr s, struct bstr *out_next) return codepoint; } +struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next) +{ + bstr rest; + int code = bstr_decode_utf8(str, &rest); + if (code < 0) + return (bstr){0}; + if (out_next) + *out_next = rest; + return bstr_splice(str, 0, str.len - rest.len); +} + int bstr_validate_utf8(struct bstr s) { while (s.len) { diff --git a/bstr/bstr.h b/bstr/bstr.h index 71d5d473c4..01fe2261a5 100644 --- a/bstr/bstr.h +++ b/bstr/bstr.h @@ -81,13 +81,19 @@ double bstrtod(struct bstr str, struct bstr *rest); void bstr_lower(struct bstr str); int bstr_sscanf(struct bstr str, const char *format, ...); -// Decode the UTF-8 code point at the start of the string,, and return the +// Decode the UTF-8 code point at the start of the string, and return the // character. // After calling this function, *out_next will point to the next character. // out_next can be NULL. // On error, -1 is returned, and *out_next is not modified. int bstr_decode_utf8(struct bstr str, struct bstr *out_next); +// Return the UTF-8 code point at the start of the string. +// After calling this function, *out_next will point to the next character. +// out_next can be NULL. +// On error, an empty string is returned, and *out_next is not modified. +struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next); + // Return the length of the UTF-8 sequence that starts with the given byte. // Given a string char *s, the next UTF-8 code point is to be expected at // s + bstr_parse_utf8_code_length(s[0]) -- cgit v1.2.3