From 68ff8a0484b592a629ef2bbcb0537265ae36d1d0 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Fri, 29 Aug 2014 12:09:04 +0200
Subject: Move compat/ and bstr/ directory contents somewhere else

bstr.c doesn't really deserve its own directory, and compat had just
a few files, most of which may as well be in osdep. There isn't really
any justification for these extra directories, so get rid of them.

The compat/libav.h was empty - just delete it. We changed our approach
to API compatibility, and will likely not need it anymore.
---
 misc/bstr.c         | 437 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 misc/bstr.h         | 214 +++++++++++++++++++++++++
 misc/charset_conv.h |   2 +-
 misc/ring.c         |   2 +-
 4 files changed, 653 insertions(+), 2 deletions(-)
 create mode 100644 misc/bstr.c
 create mode 100644 misc/bstr.h

(limited to 'misc')

diff --git a/misc/bstr.c b/misc/bstr.c
new file mode 100644
index 0000000000..a6268b4d53
--- /dev/null
+++ b/misc/bstr.c
@@ -0,0 +1,437 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <libavutil/common.h>
+
+#include "talloc.h"
+
+#include "common/common.h"
+#include "misc/ctype.h"
+#include "bstr.h"
+
+int bstrcmp(struct bstr str1, struct bstr str2)
+{
+    int ret = memcmp(str1.start, str2.start, FFMIN(str1.len, str2.len));
+
+    if (!ret) {
+        if (str1.len == str2.len)
+            return 0;
+        else if (str1.len > str2.len)
+            return 1;
+        else
+            return -1;
+    }
+    return ret;
+}
+
+int bstrcasecmp(struct bstr str1, struct bstr str2)
+{
+    int ret = strncasecmp(str1.start, str2.start, FFMIN(str1.len, str2.len));
+
+    if (!ret) {
+        if (str1.len == str2.len)
+            return 0;
+        else if (str1.len > str2.len)
+            return 1;
+        else
+            return -1;
+    }
+    return ret;
+}
+
+int bstrchr(struct bstr str, int c)
+{
+    for (int i = 0; i < str.len; i++)
+        if (str.start[i] == c)
+            return i;
+    return -1;
+}
+
+int bstrrchr(struct bstr str, int c)
+{
+    for (int i = str.len - 1; i >= 0; i--)
+        if (str.start[i] == c)
+            return i;
+    return -1;
+}
+
+int bstrcspn(struct bstr str, const char *reject)
+{
+    int i;
+    for (i = 0; i < str.len; i++)
+        if (strchr(reject, str.start[i]))
+            break;
+    return i;
+}
+
+int bstrspn(struct bstr str, const char *accept)
+{
+    int i;
+    for (i = 0; i < str.len; i++)
+        if (!strchr(accept, str.start[i]))
+            break;
+    return i;
+}
+
+int bstr_find(struct bstr haystack, struct bstr needle)
+{
+    for (int i = 0; i < haystack.len; i++)
+        if (bstr_startswith(bstr_splice(haystack, i, haystack.len), needle))
+            return i;
+    return -1;
+}
+
+struct bstr bstr_lstrip(struct bstr str)
+{
+    while (str.len && mp_isspace(*str.start)) {
+        str.start++;
+        str.len--;
+    }
+    return str;
+}
+
+struct bstr bstr_strip(struct bstr str)
+{
+    str = bstr_lstrip(str);
+    while (str.len && mp_isspace(str.start[str.len - 1]))
+        str.len--;
+    return str;
+}
+
+struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest)
+{
+    int start;
+    for (start = 0; start < str.len; start++)
+        if (!strchr(sep, str.start[start]))
+            break;
+    str = bstr_cut(str, start);
+    int end = bstrcspn(str, sep);
+    if (rest) {
+        *rest = bstr_cut(str, end);
+    }
+    return bstr_splice(str, 0, end);
+}
+
+// Unlike with bstr_split(), tok is a string, and not a set of char.
+// If tok is in str, return true, and: concat(out_left, tok, out_right) == str
+// Otherwise, return false, and set out_left==str, out_right==""
+bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right)
+{
+    bstr bsep = bstr0(tok);
+    int pos = bstr_find(str, bsep);
+    if (pos < 0)
+        pos = str.len;
+    *out_left = bstr_splice(str, 0, pos);
+    *out_right = bstr_cut(str, pos + bsep.len);
+    return pos != str.len;
+}
+
+struct bstr bstr_splice(struct bstr str, int start, int end)
+{
+    if (start < 0)
+        start += str.len;
+    if (end < 0)
+        end += str.len;
+    end = FFMIN(end, str.len);
+    start = FFMAX(start, 0);
+    end = FFMAX(end, start);
+    str.start += start;
+    str.len = end - start;
+    return str;
+}
+
+long long bstrtoll(struct bstr str, struct bstr *rest, int base)
+{
+    str = bstr_lstrip(str);
+    char buf[51];
+    int len = FFMIN(str.len, 50);
+    memcpy(buf, str.start, len);
+    buf[len] = 0;
+    char *endptr;
+    long long r = strtoll(buf, &endptr, base);
+    if (rest)
+        *rest = bstr_cut(str, endptr - buf);
+    return r;
+}
+
+double bstrtod(struct bstr str, struct bstr *rest)
+{
+    str = bstr_lstrip(str);
+    char buf[101];
+    int len = FFMIN(str.len, 100);
+    memcpy(buf, str.start, len);
+    buf[len] = 0;
+    char *endptr;
+    double r = strtod(buf, &endptr);
+    if (rest)
+        *rest = bstr_cut(str, endptr - buf);
+    return r;
+}
+
+struct bstr *bstr_splitlines(void *talloc_ctx, struct bstr str)
+{
+    if (str.len == 0)
+        return NULL;
+    int count = 0;
+    for (int i = 0; i < str.len; i++)
+        if (str.start[i] == '\n')
+            count++;
+    if (str.start[str.len - 1] != '\n')
+        count++;
+    struct bstr *r = talloc_array_ptrtype(talloc_ctx, r, count);
+    unsigned char *p = str.start;
+    for (int i = 0; i < count - 1; i++) {
+        r[i].start = p;
+        while (*p++ != '\n');
+        r[i].len = p - r[i].start;
+    }
+    r[count - 1].start = p;
+    r[count - 1].len = str.start + str.len - p;
+    return r;
+}
+
+struct bstr bstr_getline(struct bstr str, struct bstr *rest)
+{
+    int pos = bstrchr(str, '\n');
+    if (pos < 0)
+        pos = str.len;
+    if (rest)
+        *rest = bstr_cut(str, pos + 1);
+    return bstr_splice(str, 0, pos + 1);
+}
+
+struct bstr bstr_strip_linebreaks(struct bstr str)
+{
+    if (bstr_endswith0(str, "\r\n")) {
+        str = bstr_splice(str, 0, str.len - 2);
+    } else if (bstr_endswith0(str, "\n")) {
+        str = bstr_splice(str, 0, str.len - 1);
+    }
+    return str;
+}
+
+bool bstr_eatstart(struct bstr *s, struct bstr prefix)
+{
+    if (!bstr_startswith(*s, prefix))
+        return false;
+    *s = bstr_cut(*s, prefix.len);
+    return true;
+}
+
+void bstr_lower(struct bstr str)
+{
+    for (int i = 0; i < str.len; i++)
+        str.start[i] = mp_tolower(str.start[i]);
+}
+
+int bstr_sscanf(struct bstr str, const char *format, ...)
+{
+    char *ptr = bstrdup0(NULL, str);
+    va_list va;
+    va_start(va, format);
+    int ret = vsscanf(ptr, format, va);
+    va_end(va);
+    talloc_free(ptr);
+    return ret;
+}
+
+int bstr_parse_utf8_code_length(unsigned char b)
+{
+    if (b < 128)
+        return 1;
+    int bytes = 7 - av_log2(b ^ 255);
+    return (bytes >= 2 && bytes <= 4) ? bytes : -1;
+}
+
+int bstr_decode_utf8(struct bstr s, struct bstr *out_next)
+{
+    if (s.len == 0)
+        return -1;
+    unsigned int codepoint = s.start[0];
+    s.start++; s.len--;
+    if (codepoint >= 128) {
+        int bytes = bstr_parse_utf8_code_length(codepoint);
+        if (bytes < 0 || s.len < bytes - 1)
+            return -1;
+        codepoint &= 127 >> bytes;
+        for (int n = 1; n < bytes; n++) {
+            int tmp = (unsigned char)s.start[0];
+            if ((tmp & 0xC0) != 0x80)
+                return -1;
+            codepoint = (codepoint << 6) | (tmp & ~0xC0);
+            s.start++; s.len--;
+        }
+        if (codepoint > 0x10FFFF || (codepoint >= 0xD800 && codepoint <= 0xDFFF))
+            return -1;
+        // Overlong sequences - check taken from libavcodec.
+        // (The only reason we even bother with this is to make libavcodec's
+        //  retarded subtitle utf-8 check happy.)
+        unsigned int min = bytes == 2 ? 0x80 : 1 << (5 * bytes - 4);
+        if (codepoint < min)
+            return -1;
+    }
+    if (out_next)
+        *out_next = s;
+    return codepoint;
+}
+
+struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next)
+{
+    bstr rest;
+    int code = bstr_decode_utf8(str, &rest);
+    if (code < 0)
+        return (bstr){0};
+    if (out_next)
+        *out_next = rest;
+    return bstr_splice(str, 0, str.len - rest.len);
+}
+
+int bstr_validate_utf8(struct bstr s)
+{
+    while (s.len) {
+        if (bstr_decode_utf8(s, &s) < 0) {
+            // Try to guess whether the sequence was just cut-off.
+            unsigned int codepoint = (unsigned char)s.start[0];
+            int bytes = bstr_parse_utf8_code_length(codepoint);
+            if (bytes > 1 && s.len < 6) {
+                // Manually check validity of left bytes
+                for (int n = 1; n < bytes; n++) {
+                    if (n >= s.len) {
+                        // Everything valid until now - just cut off.
+                        return -(bytes - s.len);
+                    }
+                    int tmp = (unsigned char)s.start[n];
+                    if ((tmp & 0xC0) != 0x80)
+                        break;
+                }
+            }
+            return -8;
+        }
+    }
+    return 0;
+}
+
+struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s)
+{
+    bstr new = {0};
+    bstr left = s;
+    unsigned char *first_ok = s.start;
+    while (left.len) {
+        int r = bstr_decode_utf8(left, &left);
+        if (r < 0) {
+            bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok});
+            mp_append_utf8_bstr(talloc_ctx, &new, (unsigned char)left.start[0]);
+            left.start += 1;
+            left.len -= 1;
+            first_ok = left.start;
+        }
+    }
+    if (!new.start)
+        return s;
+    if (first_ok != left.start)
+        bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok});
+    return new;
+}
+
+static void resize_append(void *talloc_ctx, bstr *s, size_t append_min)
+{
+    size_t size = talloc_get_size(s->start);
+    assert(s->len <= size);
+    if (append_min > size - s->len) {
+        if (append_min < size)
+            append_min = size; // preallocate in power of 2s
+        if (size >= SIZE_MAX / 2 || append_min >= SIZE_MAX / 2)
+            abort(); // oom
+        s->start = talloc_realloc_size(talloc_ctx, s->start, size + append_min);
+    }
+}
+
+// Append the string, so that *s = *s + append. s->start is expected to be
+// a talloc allocation (which can be realloced) or NULL.
+// This function will always implicitly append a \0 after the new string for
+// convenience.
+// talloc_ctx will be used as parent context, if s->start is NULL.
+void bstr_xappend(void *talloc_ctx, bstr *s, bstr append)
+{
+    resize_append(talloc_ctx, s, append.len + 1);
+    memcpy(s->start + s->len, append.start, append.len);
+    s->len += append.len;
+    s->start[s->len] = '\0';
+}
+
+void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    bstr_xappend_vasprintf(talloc_ctx, s, fmt, ap);
+    va_end(ap);
+}
+
+// Exactly as bstr_xappend(), but with a formatted string.
+void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt,
+                            va_list ap)
+{
+    int size;
+    va_list copy;
+    va_copy(copy, ap);
+    char c;
+    size = vsnprintf(&c, 1, fmt, copy);
+    va_end(copy);
+
+    if (size < 0)
+        abort();
+
+    resize_append(talloc_ctx, s, size + 1);
+    vsnprintf(s->start + s->len, size + 1, fmt, ap);
+    s->len += size;
+}
+
+bool bstr_case_startswith(struct bstr s, struct bstr prefix)
+{
+    struct bstr start = bstr_splice(s, 0, prefix.len);
+    return start.len == prefix.len && bstrcasecmp(start, prefix) == 0;
+}
+
+bool bstr_case_endswith(struct bstr s, struct bstr suffix)
+{
+    struct bstr end = bstr_cut(s, -suffix.len);
+    return end.len == suffix.len && bstrcasecmp(end, suffix) == 0;
+}
+
+struct bstr bstr_strip_ext(struct bstr str)
+{
+    int dotpos = bstrrchr(str, '.');
+    if (dotpos < 0)
+        return str;
+    return (struct bstr){str.start, dotpos};
+}
+
+struct bstr bstr_get_ext(struct bstr s)
+{
+    int dotpos = bstrrchr(s, '.');
+    if (dotpos < 0)
+        return (struct bstr){NULL, 0};
+    return bstr_splice(s, dotpos + 1, s.len);
+}
diff --git a/misc/bstr.h b/misc/bstr.h
new file mode 100644
index 0000000000..a1e99dd4a5
--- /dev/null
+++ b/misc/bstr.h
@@ -0,0 +1,214 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_BSTR_H
+#define MPLAYER_BSTR_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#include "talloc.h"
+#include "osdep/compiler.h"
+
+/* NOTE: 'len' is size_t, but most string-handling functions below assume
+ * that input size has been sanity checked and len fits in an int.
+ */
+typedef struct bstr {
+    unsigned char *start;
+    size_t len;
+} bstr;
+
+// If str.start is NULL, return NULL.
+static inline char *bstrdup0(void *talloc_ctx, struct bstr str)
+{
+    return talloc_strndup(talloc_ctx, (char *)str.start, str.len);
+}
+
+// Like bstrdup0(), but always return a valid C-string.
+static inline char *bstrto0(void *talloc_ctx, struct bstr str)
+{
+    return str.start ? bstrdup0(talloc_ctx, str) : talloc_strdup(talloc_ctx, "");
+}
+
+// Return start = NULL iff that is true for the original.
+static inline struct bstr bstrdup(void *talloc_ctx, struct bstr str)
+{
+    struct bstr r = { NULL, str.len };
+    if (str.start)
+        r.start = (unsigned char *)talloc_memdup(talloc_ctx, str.start, str.len);
+    return r;
+}
+
+static inline struct bstr bstr0(const char *s)
+{
+    return (struct bstr){(unsigned char *)s, s ? strlen(s) : 0};
+}
+
+int bstrcmp(struct bstr str1, struct bstr str2);
+int bstrcasecmp(struct bstr str1, struct bstr str2);
+int bstrchr(struct bstr str, int c);
+int bstrrchr(struct bstr str, int c);
+int bstrspn(struct bstr str, const char *accept);
+int bstrcspn(struct bstr str, const char *reject);
+
+int bstr_find(struct bstr haystack, struct bstr needle);
+struct bstr *bstr_splitlines(void *talloc_ctx, struct bstr str);
+struct bstr bstr_lstrip(struct bstr str);
+struct bstr bstr_strip(struct bstr str);
+struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest);
+bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right);
+struct bstr bstr_splice(struct bstr str, int start, int end);
+long long bstrtoll(struct bstr str, struct bstr *rest, int base);
+double bstrtod(struct bstr str, struct bstr *rest);
+void bstr_lower(struct bstr str);
+int bstr_sscanf(struct bstr str, const char *format, ...);
+
+// Decode the UTF-8 code point at the start of the string, and return the
+// character.
+// After calling this function, *out_next will point to the next character.
+// out_next can be NULL.
+// On error, -1 is returned, and *out_next is not modified.
+int bstr_decode_utf8(struct bstr str, struct bstr *out_next);
+
+// Return the UTF-8 code point at the start of the string.
+// After calling this function, *out_next will point to the next character.
+// out_next can be NULL.
+// On error, an empty string is returned, and *out_next is not modified.
+struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next);
+
+// Return the length of the UTF-8 sequence that starts with the given byte.
+// Given a string char *s, the next UTF-8 code point is to be expected at
+//      s + bstr_parse_utf8_code_length(s[0])
+// On error, -1 is returned. On success, it returns a value in the range [1, 4].
+int bstr_parse_utf8_code_length(unsigned char b);
+
+// Return >= 0 if the string is valid UTF-8, otherwise negative error code.
+// Embedded \0 bytes are considered valid.
+// This returns -N if the UTF-8 string was likely just cut-off in the middle of
+// an UTF-8 sequence: -1 means 1 byte was missing, -5 5 bytes missing.
+// If the string was likely not cut off, -8 is returned.
+// Use (return_value > -8) to check whether the string is valid UTF-8 or valid
+// but cut-off UTF-8.
+int bstr_validate_utf8(struct bstr s);
+
+// Force the input string to valid UTF-8. If invalid UTF-8 encoding is
+// encountered, the invalid bytes are interpreted as Latin-1.
+// Embedded \0 bytes are considered valid.
+// If replacement happens, a newly allocated string is returned (with a \0
+// byte added past its end for convenience). The string is allocated via
+// talloc, with talloc_ctx as parent.
+struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s);
+
+// Return the text before the next line break, and return it. Change *rest to
+// point to the text following this line break. (rest can be NULL.)
+// Line break characters are not stripped.
+struct bstr bstr_getline(struct bstr str, struct bstr *rest);
+
+// Strip one trailing line break. This is intended for use with bstr_getline,
+// and will remove the trailing \n or \r\n sequence.
+struct bstr bstr_strip_linebreaks(struct bstr str);
+
+void bstr_xappend(void *talloc_ctx, bstr *s, bstr append);
+void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...)
+    PRINTF_ATTRIBUTE(3, 4);
+void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, va_list va)
+    PRINTF_ATTRIBUTE(3, 0);
+
+// If s starts with prefix, return true and return the rest of the string in s.
+bool bstr_eatstart(struct bstr *s, struct bstr prefix);
+
+bool bstr_case_startswith(struct bstr s, struct bstr prefix);
+bool bstr_case_endswith(struct bstr s, struct bstr suffix);
+struct bstr bstr_strip_ext(struct bstr str);
+struct bstr bstr_get_ext(struct bstr s);
+
+static inline struct bstr bstr_cut(struct bstr str, int n)
+{
+    if (n < 0) {
+        n += str.len;
+        if (n < 0)
+            n = 0;
+    }
+    if (((size_t)n) > str.len)
+        n = str.len;
+    return (struct bstr){str.start + n, str.len - n};
+}
+
+static inline bool bstr_startswith(struct bstr str, struct bstr prefix)
+{
+    if (str.len < prefix.len)
+        return false;
+    return !memcmp(str.start, prefix.start, prefix.len);
+}
+
+static inline bool bstr_startswith0(struct bstr str, const char *prefix)
+{
+    return bstr_startswith(str, bstr0(prefix));
+}
+
+static inline bool bstr_endswith(struct bstr str, struct bstr suffix)
+{
+    if (str.len < suffix.len)
+        return false;
+    return !memcmp(str.start + str.len - suffix.len, suffix.start, suffix.len);
+}
+
+static inline bool bstr_endswith0(struct bstr str, const char *suffix)
+{
+    return bstr_endswith(str, bstr0(suffix));
+}
+
+static inline int bstrcmp0(struct bstr str1, const char *str2)
+{
+    return bstrcmp(str1, bstr0(str2));
+}
+
+static inline bool bstr_equals(struct bstr str1, struct bstr str2)
+{
+    return bstrcmp(str1, str2) == 0;
+}
+
+static inline bool bstr_equals0(struct bstr str1, const char *str2)
+{
+    return bstrcmp(str1, bstr0(str2)) == 0;
+}
+
+static inline int bstrcasecmp0(struct bstr str1, const char *str2)
+{
+    return bstrcasecmp(str1, bstr0(str2));
+}
+
+static inline int bstr_find0(struct bstr haystack, const char *needle)
+{
+    return bstr_find(haystack, bstr0(needle));
+}
+
+static inline int bstr_eatstart0(struct bstr *s, const char *prefix)
+{
+    return bstr_eatstart(s, bstr0(prefix));
+}
+
+// create a pair (not single value!) for "%.*s" printf syntax
+#define BSTR_P(bstr) (int)((bstr).len), (bstr).start
+
+#define WHITESPACE " \f\n\r\t\v"
+
+#endif /* MPLAYER_BSTR_H */
diff --git a/misc/charset_conv.h b/misc/charset_conv.h
index e9efa48444..93bd91cffe 100644
--- a/misc/charset_conv.h
+++ b/misc/charset_conv.h
@@ -2,7 +2,7 @@
 #define MP_CHARSET_CONV_H
 
 #include <stdbool.h>
-#include "bstr/bstr.h"
+#include "misc/bstr.h"
 
 struct mp_log;
 
diff --git a/misc/ring.c b/misc/ring.c
index 804e6330b0..41c9c6a99e 100644
--- a/misc/ring.c
+++ b/misc/ring.c
@@ -21,7 +21,7 @@
 #include <libavutil/common.h>
 #include <assert.h>
 #include "talloc.h"
-#include "compat/atomics.h"
+#include "osdep/atomics.h"
 #include "ring.h"
 
 struct mp_ring {
-- 
cgit v1.2.3