summaryrefslogtreecommitdiffstats
path: root/common
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-12-30 20:28:32 +0100
committerwm4 <wm4@nowhere>2013-12-30 22:49:50 +0100
commit066ecfcbfb0b7120183338c5382e98c609a9d89a (patch)
treec03269c234b3b7edcc3bf19374134e341628dc42 /common
parent097fe8ea6fb25df68077c25c08f29fb57a9d2bd6 (diff)
downloadmpv-066ecfcbfb0b7120183338c5382e98c609a9d89a.tar.bz2
mpv-066ecfcbfb0b7120183338c5382e98c609a9d89a.tar.xz
common: simplify and optimize string escape parsing
This code is shared between input.conf parser and option parser. Until now, the performance didn't really matter. But I want to use this code for JSON parsing too, and since JSON will have to be parsed a lot, it should probably try to avoid realloc'ing too much. This commit moves parsing of C-style escaped strings into a common function, and allows using it in a way realloc can be completely avoided, if the already allocated buffer is large enough.
Diffstat (limited to 'common')
-rw-r--r--common/common.c73
-rw-r--r--common/common.h8
2 files changed, 76 insertions, 5 deletions
diff --git a/common/common.c b/common/common.c
index 365a369425..741dc236b9 100644
--- a/common/common.c
+++ b/common/common.c
@@ -115,12 +115,22 @@ char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint)
return talloc_strndup_append_buffer(buffer, data, output - data);
}
+// Like mp_append_utf8_buffer, but use bstr_xappend().
+void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint)
+{
+ char data[8];
+ uint8_t tmp;
+ char *output = data;
+ PUT_UTF8(codepoint, tmp, *output++ = tmp;);
+ bstr_xappend(talloc_ctx, buf, (bstr){data, output - data});
+}
+
// Parse a C-style escape beginning at code, and append the result to *str
// using talloc. The input string (*code) must point to the first character
// after the initial '\', and after parsing *code is set to the first character
// after the current escape.
// On error, false is returned, and all input remains unchanged.
-bool mp_parse_escape(bstr *code, char **str)
+static bool mp_parse_escape(void *talloc_ctx, bstr *dst, bstr *code)
{
if (code->len < 1)
return false;
@@ -137,7 +147,7 @@ bool mp_parse_escape(bstr *code, char **str)
case '\'': replace = '\''; break;
}
if (replace) {
- *str = talloc_strndup_append_buffer(*str, &replace, 1);
+ bstr_xappend(talloc_ctx, dst, (bstr){&replace, 1});
*code = bstr_cut(*code, 1);
return true;
}
@@ -146,7 +156,7 @@ bool mp_parse_escape(bstr *code, char **str)
char c = bstrtoll(num, &num, 16);
if (!num.len)
return false;
- *str = talloc_strndup_append_buffer(*str, &c, 1);
+ bstr_xappend(talloc_ctx, dst, (bstr){&c, 1});
*code = bstr_cut(*code, 3);
return true;
}
@@ -155,9 +165,64 @@ bool mp_parse_escape(bstr *code, char **str)
int c = bstrtoll(num, &num, 16);
if (num.len)
return false;
- *str = mp_append_utf8_buffer(*str, c);
+ mp_append_utf8_bstr(talloc_ctx, dst, c);
*code = bstr_cut(*code, 5);
return true;
}
return false;
}
+
+// Like mp_append_escaped_string, but set *dst to sliced *src if no escape
+// sequences have to be parsed (i.e. no memory allocation is required), and
+// if dst->start was NULL on function entry.
+bool mp_append_escaped_string_noalloc(void *talloc_ctx, bstr *dst, bstr *src)
+{
+ bstr t = *src;
+ int cur = 0;
+ while (1) {
+ if (cur >= t.len || t.start[cur] == '"') {
+ *src = bstr_cut(t, cur);
+ t = bstr_splice(t, 0, cur);
+ if (dst->start == NULL) {
+ *dst = t;
+ } else {
+ bstr_xappend(talloc_ctx, dst, t);
+ }
+ return true;
+ } else if (t.start[cur] == '\\') {
+ bstr_xappend(talloc_ctx, dst, bstr_splice(t, 0, cur));
+ t = bstr_cut(t, cur + 1);
+ cur = 0;
+ if (!mp_parse_escape(talloc_ctx, dst, &t))
+ goto error;
+ } else {
+ cur++;
+ }
+ }
+error:
+ return false;
+}
+
+// src is expected to point to a C-style string literal, *src pointing to the
+// first char after the starting '"'. It will append the contents of the literal
+// to *dst (using talloc_ctx) until the first '"' or the end of *str is found.
+// See bstr_xappend() how data is appended to *dst.
+// On success, *src will either start with '"', or be empty.
+// On error, return false, and *dst will contain the string until the first
+// error, *src is not changed.
+// Note that dst->start will be implicitly \0-terminated on successful return,
+// and if it was NULL or \0-terminated before calling the function.
+// As mentioned above, the caller is responsible for skipping the '"' chars.
+bool mp_append_escaped_string(void *talloc_ctx, bstr *dst, bstr *src)
+{
+ if (mp_append_escaped_string_noalloc(talloc_ctx, dst, src)) {
+ // Guarantee copy (or allocation).
+ if (!dst->start || dst->start == src->start) {
+ bstr res = *dst;
+ *dst = (bstr){0};
+ bstr_xappend(talloc_ctx, dst, res);
+ }
+ return true;
+ }
+ return false;
+}
diff --git a/common/common.h b/common/common.h
index ae2fb8f2d5..7ae18d1b6f 100644
--- a/common/common.h
+++ b/common/common.h
@@ -76,6 +76,12 @@ bool mp_rect_intersection(struct mp_rect *rc, const struct mp_rect *rc2);
char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint);
struct bstr;
-bool mp_parse_escape(struct bstr *code, char **str);
+
+void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint);
+
+bool mp_append_escaped_string_noalloc(void *talloc_ctx, struct bstr *dst,
+ struct bstr *src);
+bool mp_append_escaped_string(void *talloc_ctx, struct bstr *dst,
+ struct bstr *src);
#endif /* MPLAYER_MPCOMMON_H */