From 68ff8a0484b592a629ef2bbcb0537265ae36d1d0 Mon Sep 17 00:00:00 2001 From: wm4 Date: Fri, 29 Aug 2014 12:09:04 +0200 Subject: Move compat/ and bstr/ directory contents somewhere else bstr.c doesn't really deserve its own directory, and compat had just a few files, most of which may as well be in osdep. There isn't really any justification for these extra directories, so get rid of them. The compat/libav.h was empty - just delete it. We changed our approach to API compatibility, and will likely not need it anymore. --- misc/bstr.c | 437 ++++++++++++++++++++++++++++++++++++++++++++++++++++ misc/bstr.h | 214 +++++++++++++++++++++++++ misc/charset_conv.h | 2 +- misc/ring.c | 2 +- 4 files changed, 653 insertions(+), 2 deletions(-) create mode 100644 misc/bstr.c create mode 100644 misc/bstr.h (limited to 'misc') diff --git a/misc/bstr.c b/misc/bstr.c new file mode 100644 index 0000000000..a6268b4d53 --- /dev/null +++ b/misc/bstr.c @@ -0,0 +1,437 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "talloc.h" + +#include "common/common.h" +#include "misc/ctype.h" +#include "bstr.h" + +int bstrcmp(struct bstr str1, struct bstr str2) +{ + int ret = memcmp(str1.start, str2.start, FFMIN(str1.len, str2.len)); + + if (!ret) { + if (str1.len == str2.len) + return 0; + else if (str1.len > str2.len) + return 1; + else + return -1; + } + return ret; +} + +int bstrcasecmp(struct bstr str1, struct bstr str2) +{ + int ret = strncasecmp(str1.start, str2.start, FFMIN(str1.len, str2.len)); + + if (!ret) { + if (str1.len == str2.len) + return 0; + else if (str1.len > str2.len) + return 1; + else + return -1; + } + return ret; +} + +int bstrchr(struct bstr str, int c) +{ + for (int i = 0; i < str.len; i++) + if (str.start[i] == c) + return i; + return -1; +} + +int bstrrchr(struct bstr str, int c) +{ + for (int i = str.len - 1; i >= 0; i--) + if (str.start[i] == c) + return i; + return -1; +} + +int bstrcspn(struct bstr str, const char *reject) +{ + int i; + for (i = 0; i < str.len; i++) + if (strchr(reject, str.start[i])) + break; + return i; +} + +int bstrspn(struct bstr str, const char *accept) +{ + int i; + for (i = 0; i < str.len; i++) + if (!strchr(accept, str.start[i])) + break; + return i; +} + +int bstr_find(struct bstr haystack, struct bstr needle) +{ + for (int i = 0; i < haystack.len; i++) + if (bstr_startswith(bstr_splice(haystack, i, haystack.len), needle)) + return i; + return -1; +} + +struct bstr bstr_lstrip(struct bstr str) +{ + while (str.len && mp_isspace(*str.start)) { + str.start++; + str.len--; + } + return str; +} + +struct bstr bstr_strip(struct bstr str) +{ + str = bstr_lstrip(str); + while (str.len && mp_isspace(str.start[str.len - 1])) + str.len--; + return str; +} + +struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest) +{ + int start; + for (start = 0; start < str.len; start++) + if (!strchr(sep, str.start[start])) + break; + str = bstr_cut(str, start); + int end = bstrcspn(str, sep); + if (rest) { + *rest = bstr_cut(str, end); + } + return bstr_splice(str, 0, end); +} + +// Unlike with bstr_split(), tok is a string, and not a set of char. +// If tok is in str, return true, and: concat(out_left, tok, out_right) == str +// Otherwise, return false, and set out_left==str, out_right=="" +bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right) +{ + bstr bsep = bstr0(tok); + int pos = bstr_find(str, bsep); + if (pos < 0) + pos = str.len; + *out_left = bstr_splice(str, 0, pos); + *out_right = bstr_cut(str, pos + bsep.len); + return pos != str.len; +} + +struct bstr bstr_splice(struct bstr str, int start, int end) +{ + if (start < 0) + start += str.len; + if (end < 0) + end += str.len; + end = FFMIN(end, str.len); + start = FFMAX(start, 0); + end = FFMAX(end, start); + str.start += start; + str.len = end - start; + return str; +} + +long long bstrtoll(struct bstr str, struct bstr *rest, int base) +{ + str = bstr_lstrip(str); + char buf[51]; + int len = FFMIN(str.len, 50); + memcpy(buf, str.start, len); + buf[len] = 0; + char *endptr; + long long r = strtoll(buf, &endptr, base); + if (rest) + *rest = bstr_cut(str, endptr - buf); + return r; +} + +double bstrtod(struct bstr str, struct bstr *rest) +{ + str = bstr_lstrip(str); + char buf[101]; + int len = FFMIN(str.len, 100); + memcpy(buf, str.start, len); + buf[len] = 0; + char *endptr; + double r = strtod(buf, &endptr); + if (rest) + *rest = bstr_cut(str, endptr - buf); + return r; +} + +struct bstr *bstr_splitlines(void *talloc_ctx, struct bstr str) +{ + if (str.len == 0) + return NULL; + int count = 0; + for (int i = 0; i < str.len; i++) + if (str.start[i] == '\n') + count++; + if (str.start[str.len - 1] != '\n') + count++; + struct bstr *r = talloc_array_ptrtype(talloc_ctx, r, count); + unsigned char *p = str.start; + for (int i = 0; i < count - 1; i++) { + r[i].start = p; + while (*p++ != '\n'); + r[i].len = p - r[i].start; + } + r[count - 1].start = p; + r[count - 1].len = str.start + str.len - p; + return r; +} + +struct bstr bstr_getline(struct bstr str, struct bstr *rest) +{ + int pos = bstrchr(str, '\n'); + if (pos < 0) + pos = str.len; + if (rest) + *rest = bstr_cut(str, pos + 1); + return bstr_splice(str, 0, pos + 1); +} + +struct bstr bstr_strip_linebreaks(struct bstr str) +{ + if (bstr_endswith0(str, "\r\n")) { + str = bstr_splice(str, 0, str.len - 2); + } else if (bstr_endswith0(str, "\n")) { + str = bstr_splice(str, 0, str.len - 1); + } + return str; +} + +bool bstr_eatstart(struct bstr *s, struct bstr prefix) +{ + if (!bstr_startswith(*s, prefix)) + return false; + *s = bstr_cut(*s, prefix.len); + return true; +} + +void bstr_lower(struct bstr str) +{ + for (int i = 0; i < str.len; i++) + str.start[i] = mp_tolower(str.start[i]); +} + +int bstr_sscanf(struct bstr str, const char *format, ...) +{ + char *ptr = bstrdup0(NULL, str); + va_list va; + va_start(va, format); + int ret = vsscanf(ptr, format, va); + va_end(va); + talloc_free(ptr); + return ret; +} + +int bstr_parse_utf8_code_length(unsigned char b) +{ + if (b < 128) + return 1; + int bytes = 7 - av_log2(b ^ 255); + return (bytes >= 2 && bytes <= 4) ? bytes : -1; +} + +int bstr_decode_utf8(struct bstr s, struct bstr *out_next) +{ + if (s.len == 0) + return -1; + unsigned int codepoint = s.start[0]; + s.start++; s.len--; + if (codepoint >= 128) { + int bytes = bstr_parse_utf8_code_length(codepoint); + if (bytes < 0 || s.len < bytes - 1) + return -1; + codepoint &= 127 >> bytes; + for (int n = 1; n < bytes; n++) { + int tmp = (unsigned char)s.start[0]; + if ((tmp & 0xC0) != 0x80) + return -1; + codepoint = (codepoint << 6) | (tmp & ~0xC0); + s.start++; s.len--; + } + if (codepoint > 0x10FFFF || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) + return -1; + // Overlong sequences - check taken from libavcodec. + // (The only reason we even bother with this is to make libavcodec's + // retarded subtitle utf-8 check happy.) + unsigned int min = bytes == 2 ? 0x80 : 1 << (5 * bytes - 4); + if (codepoint < min) + return -1; + } + if (out_next) + *out_next = s; + return codepoint; +} + +struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next) +{ + bstr rest; + int code = bstr_decode_utf8(str, &rest); + if (code < 0) + return (bstr){0}; + if (out_next) + *out_next = rest; + return bstr_splice(str, 0, str.len - rest.len); +} + +int bstr_validate_utf8(struct bstr s) +{ + while (s.len) { + if (bstr_decode_utf8(s, &s) < 0) { + // Try to guess whether the sequence was just cut-off. + unsigned int codepoint = (unsigned char)s.start[0]; + int bytes = bstr_parse_utf8_code_length(codepoint); + if (bytes > 1 && s.len < 6) { + // Manually check validity of left bytes + for (int n = 1; n < bytes; n++) { + if (n >= s.len) { + // Everything valid until now - just cut off. + return -(bytes - s.len); + } + int tmp = (unsigned char)s.start[n]; + if ((tmp & 0xC0) != 0x80) + break; + } + } + return -8; + } + } + return 0; +} + +struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s) +{ + bstr new = {0}; + bstr left = s; + unsigned char *first_ok = s.start; + while (left.len) { + int r = bstr_decode_utf8(left, &left); + if (r < 0) { + bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok}); + mp_append_utf8_bstr(talloc_ctx, &new, (unsigned char)left.start[0]); + left.start += 1; + left.len -= 1; + first_ok = left.start; + } + } + if (!new.start) + return s; + if (first_ok != left.start) + bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok}); + return new; +} + +static void resize_append(void *talloc_ctx, bstr *s, size_t append_min) +{ + size_t size = talloc_get_size(s->start); + assert(s->len <= size); + if (append_min > size - s->len) { + if (append_min < size) + append_min = size; // preallocate in power of 2s + if (size >= SIZE_MAX / 2 || append_min >= SIZE_MAX / 2) + abort(); // oom + s->start = talloc_realloc_size(talloc_ctx, s->start, size + append_min); + } +} + +// Append the string, so that *s = *s + append. s->start is expected to be +// a talloc allocation (which can be realloced) or NULL. +// This function will always implicitly append a \0 after the new string for +// convenience. +// talloc_ctx will be used as parent context, if s->start is NULL. +void bstr_xappend(void *talloc_ctx, bstr *s, bstr append) +{ + resize_append(talloc_ctx, s, append.len + 1); + memcpy(s->start + s->len, append.start, append.len); + s->len += append.len; + s->start[s->len] = '\0'; +} + +void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + bstr_xappend_vasprintf(talloc_ctx, s, fmt, ap); + va_end(ap); +} + +// Exactly as bstr_xappend(), but with a formatted string. +void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, + va_list ap) +{ + int size; + va_list copy; + va_copy(copy, ap); + char c; + size = vsnprintf(&c, 1, fmt, copy); + va_end(copy); + + if (size < 0) + abort(); + + resize_append(talloc_ctx, s, size + 1); + vsnprintf(s->start + s->len, size + 1, fmt, ap); + s->len += size; +} + +bool bstr_case_startswith(struct bstr s, struct bstr prefix) +{ + struct bstr start = bstr_splice(s, 0, prefix.len); + return start.len == prefix.len && bstrcasecmp(start, prefix) == 0; +} + +bool bstr_case_endswith(struct bstr s, struct bstr suffix) +{ + struct bstr end = bstr_cut(s, -suffix.len); + return end.len == suffix.len && bstrcasecmp(end, suffix) == 0; +} + +struct bstr bstr_strip_ext(struct bstr str) +{ + int dotpos = bstrrchr(str, '.'); + if (dotpos < 0) + return str; + return (struct bstr){str.start, dotpos}; +} + +struct bstr bstr_get_ext(struct bstr s) +{ + int dotpos = bstrrchr(s, '.'); + if (dotpos < 0) + return (struct bstr){NULL, 0}; + return bstr_splice(s, dotpos + 1, s.len); +} diff --git a/misc/bstr.h b/misc/bstr.h new file mode 100644 index 0000000000..a1e99dd4a5 --- /dev/null +++ b/misc/bstr.h @@ -0,0 +1,214 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_BSTR_H +#define MPLAYER_BSTR_H + +#include +#include +#include +#include +#include + +#include "talloc.h" +#include "osdep/compiler.h" + +/* NOTE: 'len' is size_t, but most string-handling functions below assume + * that input size has been sanity checked and len fits in an int. + */ +typedef struct bstr { + unsigned char *start; + size_t len; +} bstr; + +// If str.start is NULL, return NULL. +static inline char *bstrdup0(void *talloc_ctx, struct bstr str) +{ + return talloc_strndup(talloc_ctx, (char *)str.start, str.len); +} + +// Like bstrdup0(), but always return a valid C-string. +static inline char *bstrto0(void *talloc_ctx, struct bstr str) +{ + return str.start ? bstrdup0(talloc_ctx, str) : talloc_strdup(talloc_ctx, ""); +} + +// Return start = NULL iff that is true for the original. +static inline struct bstr bstrdup(void *talloc_ctx, struct bstr str) +{ + struct bstr r = { NULL, str.len }; + if (str.start) + r.start = (unsigned char *)talloc_memdup(talloc_ctx, str.start, str.len); + return r; +} + +static inline struct bstr bstr0(const char *s) +{ + return (struct bstr){(unsigned char *)s, s ? strlen(s) : 0}; +} + +int bstrcmp(struct bstr str1, struct bstr str2); +int bstrcasecmp(struct bstr str1, struct bstr str2); +int bstrchr(struct bstr str, int c); +int bstrrchr(struct bstr str, int c); +int bstrspn(struct bstr str, const char *accept); +int bstrcspn(struct bstr str, const char *reject); + +int bstr_find(struct bstr haystack, struct bstr needle); +struct bstr *bstr_splitlines(void *talloc_ctx, struct bstr str); +struct bstr bstr_lstrip(struct bstr str); +struct bstr bstr_strip(struct bstr str); +struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest); +bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right); +struct bstr bstr_splice(struct bstr str, int start, int end); +long long bstrtoll(struct bstr str, struct bstr *rest, int base); +double bstrtod(struct bstr str, struct bstr *rest); +void bstr_lower(struct bstr str); +int bstr_sscanf(struct bstr str, const char *format, ...); + +// Decode the UTF-8 code point at the start of the string, and return the +// character. +// After calling this function, *out_next will point to the next character. +// out_next can be NULL. +// On error, -1 is returned, and *out_next is not modified. +int bstr_decode_utf8(struct bstr str, struct bstr *out_next); + +// Return the UTF-8 code point at the start of the string. +// After calling this function, *out_next will point to the next character. +// out_next can be NULL. +// On error, an empty string is returned, and *out_next is not modified. +struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next); + +// Return the length of the UTF-8 sequence that starts with the given byte. +// Given a string char *s, the next UTF-8 code point is to be expected at +// s + bstr_parse_utf8_code_length(s[0]) +// On error, -1 is returned. On success, it returns a value in the range [1, 4]. +int bstr_parse_utf8_code_length(unsigned char b); + +// Return >= 0 if the string is valid UTF-8, otherwise negative error code. +// Embedded \0 bytes are considered valid. +// This returns -N if the UTF-8 string was likely just cut-off in the middle of +// an UTF-8 sequence: -1 means 1 byte was missing, -5 5 bytes missing. +// If the string was likely not cut off, -8 is returned. +// Use (return_value > -8) to check whether the string is valid UTF-8 or valid +// but cut-off UTF-8. +int bstr_validate_utf8(struct bstr s); + +// Force the input string to valid UTF-8. If invalid UTF-8 encoding is +// encountered, the invalid bytes are interpreted as Latin-1. +// Embedded \0 bytes are considered valid. +// If replacement happens, a newly allocated string is returned (with a \0 +// byte added past its end for convenience). The string is allocated via +// talloc, with talloc_ctx as parent. +struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s); + +// Return the text before the next line break, and return it. Change *rest to +// point to the text following this line break. (rest can be NULL.) +// Line break characters are not stripped. +struct bstr bstr_getline(struct bstr str, struct bstr *rest); + +// Strip one trailing line break. This is intended for use with bstr_getline, +// and will remove the trailing \n or \r\n sequence. +struct bstr bstr_strip_linebreaks(struct bstr str); + +void bstr_xappend(void *talloc_ctx, bstr *s, bstr append); +void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...) + PRINTF_ATTRIBUTE(3, 4); +void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, va_list va) + PRINTF_ATTRIBUTE(3, 0); + +// If s starts with prefix, return true and return the rest of the string in s. +bool bstr_eatstart(struct bstr *s, struct bstr prefix); + +bool bstr_case_startswith(struct bstr s, struct bstr prefix); +bool bstr_case_endswith(struct bstr s, struct bstr suffix); +struct bstr bstr_strip_ext(struct bstr str); +struct bstr bstr_get_ext(struct bstr s); + +static inline struct bstr bstr_cut(struct bstr str, int n) +{ + if (n < 0) { + n += str.len; + if (n < 0) + n = 0; + } + if (((size_t)n) > str.len) + n = str.len; + return (struct bstr){str.start + n, str.len - n}; +} + +static inline bool bstr_startswith(struct bstr str, struct bstr prefix) +{ + if (str.len < prefix.len) + return false; + return !memcmp(str.start, prefix.start, prefix.len); +} + +static inline bool bstr_startswith0(struct bstr str, const char *prefix) +{ + return bstr_startswith(str, bstr0(prefix)); +} + +static inline bool bstr_endswith(struct bstr str, struct bstr suffix) +{ + if (str.len < suffix.len) + return false; + return !memcmp(str.start + str.len - suffix.len, suffix.start, suffix.len); +} + +static inline bool bstr_endswith0(struct bstr str, const char *suffix) +{ + return bstr_endswith(str, bstr0(suffix)); +} + +static inline int bstrcmp0(struct bstr str1, const char *str2) +{ + return bstrcmp(str1, bstr0(str2)); +} + +static inline bool bstr_equals(struct bstr str1, struct bstr str2) +{ + return bstrcmp(str1, str2) == 0; +} + +static inline bool bstr_equals0(struct bstr str1, const char *str2) +{ + return bstrcmp(str1, bstr0(str2)) == 0; +} + +static inline int bstrcasecmp0(struct bstr str1, const char *str2) +{ + return bstrcasecmp(str1, bstr0(str2)); +} + +static inline int bstr_find0(struct bstr haystack, const char *needle) +{ + return bstr_find(haystack, bstr0(needle)); +} + +static inline int bstr_eatstart0(struct bstr *s, const char *prefix) +{ + return bstr_eatstart(s, bstr0(prefix)); +} + +// create a pair (not single value!) for "%.*s" printf syntax +#define BSTR_P(bstr) (int)((bstr).len), (bstr).start + +#define WHITESPACE " \f\n\r\t\v" + +#endif /* MPLAYER_BSTR_H */ diff --git a/misc/charset_conv.h b/misc/charset_conv.h index e9efa48444..93bd91cffe 100644 --- a/misc/charset_conv.h +++ b/misc/charset_conv.h @@ -2,7 +2,7 @@ #define MP_CHARSET_CONV_H #include -#include "bstr/bstr.h" +#include "misc/bstr.h" struct mp_log; diff --git a/misc/ring.c b/misc/ring.c index 804e6330b0..41c9c6a99e 100644 --- a/misc/ring.c +++ b/misc/ring.c @@ -21,7 +21,7 @@ #include #include #include "talloc.h" -#include "compat/atomics.h" +#include "osdep/atomics.h" #include "ring.h" struct mp_ring { -- cgit v1.2.3