summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-12-30 20:28:32 +0100
committerwm4 <wm4@nowhere>2013-12-30 22:49:50 +0100
commit066ecfcbfb0b7120183338c5382e98c609a9d89a (patch)
treec03269c234b3b7edcc3bf19374134e341628dc42
parent097fe8ea6fb25df68077c25c08f29fb57a9d2bd6 (diff)
downloadmpv-066ecfcbfb0b7120183338c5382e98c609a9d89a.tar.bz2
mpv-066ecfcbfb0b7120183338c5382e98c609a9d89a.tar.xz
common: simplify and optimize string escape parsing
This code is shared between input.conf parser and option parser. Until now, the performance didn't really matter. But I want to use this code for JSON parsing too, and since JSON will have to be parsed a lot, it should probably try to avoid realloc'ing too much. This commit moves parsing of C-style escaped strings into a common function, and allows using it in a way realloc can be completely avoided, if the already allocated buffer is large enough.
-rw-r--r--common/common.c73
-rw-r--r--common/common.h8
-rw-r--r--input/cmd_parse.c27
-rw-r--r--options/m_option.c19
4 files changed, 85 insertions, 42 deletions
diff --git a/common/common.c b/common/common.c
index 365a369425..741dc236b9 100644
--- a/common/common.c
+++ b/common/common.c
@@ -115,12 +115,22 @@ char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint)
return talloc_strndup_append_buffer(buffer, data, output - data);
}
+// Like mp_append_utf8_buffer, but use bstr_xappend().
+void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint)
+{
+ char data[8];
+ uint8_t tmp;
+ char *output = data;
+ PUT_UTF8(codepoint, tmp, *output++ = tmp;);
+ bstr_xappend(talloc_ctx, buf, (bstr){data, output - data});
+}
+
// Parse a C-style escape beginning at code, and append the result to *str
// using talloc. The input string (*code) must point to the first character
// after the initial '\', and after parsing *code is set to the first character
// after the current escape.
// On error, false is returned, and all input remains unchanged.
-bool mp_parse_escape(bstr *code, char **str)
+static bool mp_parse_escape(void *talloc_ctx, bstr *dst, bstr *code)
{
if (code->len < 1)
return false;
@@ -137,7 +147,7 @@ bool mp_parse_escape(bstr *code, char **str)
case '\'': replace = '\''; break;
}
if (replace) {
- *str = talloc_strndup_append_buffer(*str, &replace, 1);
+ bstr_xappend(talloc_ctx, dst, (bstr){&replace, 1});
*code = bstr_cut(*code, 1);
return true;
}
@@ -146,7 +156,7 @@ bool mp_parse_escape(bstr *code, char **str)
char c = bstrtoll(num, &num, 16);
if (!num.len)
return false;
- *str = talloc_strndup_append_buffer(*str, &c, 1);
+ bstr_xappend(talloc_ctx, dst, (bstr){&c, 1});
*code = bstr_cut(*code, 3);
return true;
}
@@ -155,9 +165,64 @@ bool mp_parse_escape(bstr *code, char **str)
int c = bstrtoll(num, &num, 16);
if (num.len)
return false;
- *str = mp_append_utf8_buffer(*str, c);
+ mp_append_utf8_bstr(talloc_ctx, dst, c);
*code = bstr_cut(*code, 5);
return true;
}
return false;
}
+
+// Like mp_append_escaped_string, but set *dst to sliced *src if no escape
+// sequences have to be parsed (i.e. no memory allocation is required), and
+// if dst->start was NULL on function entry.
+bool mp_append_escaped_string_noalloc(void *talloc_ctx, bstr *dst, bstr *src)
+{
+ bstr t = *src;
+ int cur = 0;
+ while (1) {
+ if (cur >= t.len || t.start[cur] == '"') {
+ *src = bstr_cut(t, cur);
+ t = bstr_splice(t, 0, cur);
+ if (dst->start == NULL) {
+ *dst = t;
+ } else {
+ bstr_xappend(talloc_ctx, dst, t);
+ }
+ return true;
+ } else if (t.start[cur] == '\\') {
+ bstr_xappend(talloc_ctx, dst, bstr_splice(t, 0, cur));
+ t = bstr_cut(t, cur + 1);
+ cur = 0;
+ if (!mp_parse_escape(talloc_ctx, dst, &t))
+ goto error;
+ } else {
+ cur++;
+ }
+ }
+error:
+ return false;
+}
+
+// src is expected to point to a C-style string literal, *src pointing to the
+// first char after the starting '"'. It will append the contents of the literal
+// to *dst (using talloc_ctx) until the first '"' or the end of *str is found.
+// See bstr_xappend() how data is appended to *dst.
+// On success, *src will either start with '"', or be empty.
+// On error, return false, and *dst will contain the string until the first
+// error, *src is not changed.
+// Note that dst->start will be implicitly \0-terminated on successful return,
+// and if it was NULL or \0-terminated before calling the function.
+// As mentioned above, the caller is responsible for skipping the '"' chars.
+bool mp_append_escaped_string(void *talloc_ctx, bstr *dst, bstr *src)
+{
+ if (mp_append_escaped_string_noalloc(talloc_ctx, dst, src)) {
+ // Guarantee copy (or allocation).
+ if (!dst->start || dst->start == src->start) {
+ bstr res = *dst;
+ *dst = (bstr){0};
+ bstr_xappend(talloc_ctx, dst, res);
+ }
+ return true;
+ }
+ return false;
+}
diff --git a/common/common.h b/common/common.h
index ae2fb8f2d5..7ae18d1b6f 100644
--- a/common/common.h
+++ b/common/common.h
@@ -76,6 +76,12 @@ bool mp_rect_intersection(struct mp_rect *rc, const struct mp_rect *rc2);
char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint);
struct bstr;
-bool mp_parse_escape(struct bstr *code, char **str);
+
+void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint);
+
+bool mp_append_escaped_string_noalloc(void *talloc_ctx, struct bstr *dst,
+ struct bstr *src);
+bool mp_append_escaped_string(void *talloc_ctx, struct bstr *dst,
+ struct bstr *src);
#endif /* MPLAYER_MPCOMMON_H */
diff --git a/input/cmd_parse.c b/input/cmd_parse.c
index c9a70035fc..2369ff5a8e 100644
--- a/input/cmd_parse.c
+++ b/input/cmd_parse.c
@@ -41,31 +41,6 @@ static bool read_token(bstr str, bstr *out_rest, bstr *out_token)
return true;
}
-static bool read_escaped_string(void *talloc_ctx, bstr *str, bstr *literal)
-{
- bstr t = *str;
- char *new = talloc_strdup(talloc_ctx, "");
- while (t.len) {
- if (t.start[0] == '"')
- break;
- if (t.start[0] == '\\') {
- t = bstr_cut(t, 1);
- if (!mp_parse_escape(&t, &new))
- goto error;
- } else {
- new = talloc_strndup_append_buffer(new, t.start, 1);
- t = bstr_cut(t, 1);
- }
- }
- int len = str->len - t.len;
- *literal = new ? bstr0(new) : bstr_splice(*str, 0, len);
- *str = bstr_cut(*str, len);
- return true;
-error:
- talloc_free(new);
- return false;
-}
-
// Somewhat awkward; the main purpose is supporting both strings and
// pre-split string arrays as input.
struct parse_ctx {
@@ -92,7 +67,7 @@ static int pctx_read_token(struct parse_ctx *ctx, bstr *out)
ctx->str = bstr_lstrip(ctx->str);
bstr start = ctx->str;
if (bstr_eatstart0(&ctx->str, "\"")) {
- if (!read_escaped_string(ctx->tmp, &ctx->str, out)) {
+ if (!mp_append_escaped_string_noalloc(ctx->tmp, out, &ctx->str)) {
MP_ERR(ctx, "Broken string escapes: ...>%.*s<.\n", BSTR_P(start));
return -1;
}
diff --git a/options/m_option.c b/options/m_option.c
index 9f98008c64..b6cfbdf0d0 100644
--- a/options/m_option.c
+++ b/options/m_option.c
@@ -746,20 +746,17 @@ const m_option_type_t m_option_type_float = {
static char *unescape_string(void *talloc_ctx, bstr str)
{
- char *res = talloc_strdup(talloc_ctx, "");
+ bstr dst = {0};
while (str.len) {
- bstr rest;
- bool esc = bstr_split_tok(str, "\\", &str, &rest);
- res = talloc_strndup_append_buffer(res, str.start, str.len);
- if (esc) {
- if (!mp_parse_escape(&rest, &res)) {
- talloc_free(res);
- return NULL;
- }
+ if (!mp_append_escaped_string(talloc_ctx, &dst, &str)) {
+ talloc_free(dst.start);
+ return NULL;
}
- str = rest;
+ if (!bstr_eatstart0(&str, "\""))
+ break;
+ bstr_xappend(talloc_ctx, &dst, bstr0("\""));
}
- return res;
+ return dst.start;
}
static char *escape_string(char *str0)