From 066ecfcbfb0b7120183338c5382e98c609a9d89a Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 30 Dec 2013 20:28:32 +0100 Subject: common: simplify and optimize string escape parsing This code is shared between input.conf parser and option parser. Until now, the performance didn't really matter. But I want to use this code for JSON parsing too, and since JSON will have to be parsed a lot, it should probably try to avoid realloc'ing too much. This commit moves parsing of C-style escaped strings into a common function, and allows using it in a way realloc can be completely avoided, if the already allocated buffer is large enough. --- common/common.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- common/common.h | 8 ++++++- 2 files changed, 76 insertions(+), 5 deletions(-) (limited to 'common') diff --git a/common/common.c b/common/common.c index 365a369425..741dc236b9 100644 --- a/common/common.c +++ b/common/common.c @@ -115,12 +115,22 @@ char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint) return talloc_strndup_append_buffer(buffer, data, output - data); } +// Like mp_append_utf8_buffer, but use bstr_xappend(). +void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint) +{ + char data[8]; + uint8_t tmp; + char *output = data; + PUT_UTF8(codepoint, tmp, *output++ = tmp;); + bstr_xappend(talloc_ctx, buf, (bstr){data, output - data}); +} + // Parse a C-style escape beginning at code, and append the result to *str // using talloc. The input string (*code) must point to the first character // after the initial '\', and after parsing *code is set to the first character // after the current escape. // On error, false is returned, and all input remains unchanged. -bool mp_parse_escape(bstr *code, char **str) +static bool mp_parse_escape(void *talloc_ctx, bstr *dst, bstr *code) { if (code->len < 1) return false; @@ -137,7 +147,7 @@ bool mp_parse_escape(bstr *code, char **str) case '\'': replace = '\''; break; } if (replace) { - *str = talloc_strndup_append_buffer(*str, &replace, 1); + bstr_xappend(talloc_ctx, dst, (bstr){&replace, 1}); *code = bstr_cut(*code, 1); return true; } @@ -146,7 +156,7 @@ bool mp_parse_escape(bstr *code, char **str) char c = bstrtoll(num, &num, 16); if (!num.len) return false; - *str = talloc_strndup_append_buffer(*str, &c, 1); + bstr_xappend(talloc_ctx, dst, (bstr){&c, 1}); *code = bstr_cut(*code, 3); return true; } @@ -155,9 +165,64 @@ bool mp_parse_escape(bstr *code, char **str) int c = bstrtoll(num, &num, 16); if (num.len) return false; - *str = mp_append_utf8_buffer(*str, c); + mp_append_utf8_bstr(talloc_ctx, dst, c); *code = bstr_cut(*code, 5); return true; } return false; } + +// Like mp_append_escaped_string, but set *dst to sliced *src if no escape +// sequences have to be parsed (i.e. no memory allocation is required), and +// if dst->start was NULL on function entry. +bool mp_append_escaped_string_noalloc(void *talloc_ctx, bstr *dst, bstr *src) +{ + bstr t = *src; + int cur = 0; + while (1) { + if (cur >= t.len || t.start[cur] == '"') { + *src = bstr_cut(t, cur); + t = bstr_splice(t, 0, cur); + if (dst->start == NULL) { + *dst = t; + } else { + bstr_xappend(talloc_ctx, dst, t); + } + return true; + } else if (t.start[cur] == '\\') { + bstr_xappend(talloc_ctx, dst, bstr_splice(t, 0, cur)); + t = bstr_cut(t, cur + 1); + cur = 0; + if (!mp_parse_escape(talloc_ctx, dst, &t)) + goto error; + } else { + cur++; + } + } +error: + return false; +} + +// src is expected to point to a C-style string literal, *src pointing to the +// first char after the starting '"'. It will append the contents of the literal +// to *dst (using talloc_ctx) until the first '"' or the end of *str is found. +// See bstr_xappend() how data is appended to *dst. +// On success, *src will either start with '"', or be empty. +// On error, return false, and *dst will contain the string until the first +// error, *src is not changed. +// Note that dst->start will be implicitly \0-terminated on successful return, +// and if it was NULL or \0-terminated before calling the function. +// As mentioned above, the caller is responsible for skipping the '"' chars. +bool mp_append_escaped_string(void *talloc_ctx, bstr *dst, bstr *src) +{ + if (mp_append_escaped_string_noalloc(talloc_ctx, dst, src)) { + // Guarantee copy (or allocation). + if (!dst->start || dst->start == src->start) { + bstr res = *dst; + *dst = (bstr){0}; + bstr_xappend(talloc_ctx, dst, res); + } + return true; + } + return false; +} diff --git a/common/common.h b/common/common.h index ae2fb8f2d5..7ae18d1b6f 100644 --- a/common/common.h +++ b/common/common.h @@ -76,6 +76,12 @@ bool mp_rect_intersection(struct mp_rect *rc, const struct mp_rect *rc2); char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint); struct bstr; -bool mp_parse_escape(struct bstr *code, char **str); + +void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint); + +bool mp_append_escaped_string_noalloc(void *talloc_ctx, struct bstr *dst, + struct bstr *src); +bool mp_append_escaped_string(void *talloc_ctx, struct bstr *dst, + struct bstr *src); #endif /* MPLAYER_MPCOMMON_H */ -- cgit v1.2.3