summaryrefslogtreecommitdiffstats
path: root/stream
diff options
context:
space:
mode:
Diffstat (limited to 'stream')
-rw-r--r--stream/stream.c168
1 files changed, 54 insertions, 114 deletions
diff --git a/stream/stream.c b/stream/stream.c
index b48de575ea..fc786ec305 100644
--- a/stream/stream.c
+++ b/stream/stream.c
@@ -30,8 +30,8 @@
#include <strings.h>
#include <assert.h>
-#include <libavutil/intreadwrite.h>
#include <libavutil/common.h>
+#include "compat/mpbswap.h"
#include "talloc.h"
@@ -841,134 +841,74 @@ static int stream_enable_cache(stream_t **stream, int64_t size, int64_t min,
return res;
}
-/**
- * Helper function to read 16 bits little-endian and advance pointer
- */
-static uint16_t get_le16_inc(const uint8_t **buf)
-{
- uint16_t v = AV_RL16(*buf);
- *buf += 2;
- return v;
-}
-
-/**
- * Helper function to read 16 bits big-endian and advance pointer
- */
-static uint16_t get_be16_inc(const uint8_t **buf)
+static uint16_t stream_read_word_endian(stream_t *s, bool big_endian)
{
- uint16_t v = AV_RB16(*buf);
- *buf += 2;
- return v;
+ unsigned int y = stream_read_char(s);
+ y = (y << 8) | stream_read_char(s);
+ if (big_endian)
+ y = bswap_16(y);
+ return y;
}
-/**
- * Find a newline character in buffer
- * \param buf buffer to search
- * \param len amount of bytes to search in buffer, may not overread
- * \param utf16 chose between UTF-8/ASCII/other and LE and BE UTF-16
- * 0 = UTF-8/ASCII/other, 1 = UTF-16-LE, 2 = UTF-16-BE
- */
-static const uint8_t *find_newline(const uint8_t *buf, int len, int utf16)
+// Read characters until the next '\n' (including), or until the buffer in s is
+// exhausted.
+static int read_characters(stream_t *s, uint8_t *dst, int dstsize, int utf16)
{
- uint32_t c;
- const uint8_t *end = buf + len;
- switch (utf16) {
- case 0:
- return (uint8_t *)memchr(buf, '\n', len);
- case 1:
- while (buf < end - 1) {
- GET_UTF16(c, buf < end - 1 ? get_le16_inc(&buf) : 0, return NULL;)
- if (buf <= end && c == '\n')
- return buf - 1;
- }
- break;
- case 2:
- while (buf < end - 1) {
- GET_UTF16(c, buf < end - 1 ? get_be16_inc(&buf) : 0, return NULL;)
- if (buf <= end && c == '\n')
- return buf - 1;
- }
- break;
- }
- return NULL;
-}
-
-#define EMPTY_STMT do{}while(0);
-
-/**
- * Copy a number of bytes, converting to UTF-8 if input is UTF-16
- * \param dst buffer to copy to
- * \param dstsize size of dst buffer
- * \param src buffer to copy from
- * \param len amount of bytes to copy from src
- * \param utf16 chose between UTF-8/ASCII/other and LE and BE UTF-16
- * 0 = UTF-8/ASCII/other, 1 = UTF-16-LE, 2 = UTF-16-BE
- */
-static int copy_characters(uint8_t *dst, int dstsize,
- const uint8_t *src, int *len, int utf16)
-{
- uint32_t c;
- uint8_t *dst_end = dst + dstsize;
- const uint8_t *end = src + *len;
- switch (utf16) {
- case 0:
- if (*len > dstsize)
- *len = dstsize;
- memcpy(dst, src, *len);
- return *len;
- case 1:
- while (src < end - 1 && dst_end - dst > 8) {
- uint8_t tmp;
- GET_UTF16(c, src < end - 1 ? get_le16_inc(&src) : 0, EMPTY_STMT)
- PUT_UTF8(c, tmp, *dst++ = tmp; EMPTY_STMT)
- }
- *len -= end - src;
- return dstsize - (dst_end - dst);
- case 2:
- while (src < end - 1 && dst_end - dst > 8) {
+ if (utf16 == 1 || utf16 == 2) {
+ uint8_t *cur = dst;
+ while (1) {
+ if ((cur - dst) + 8 >= dstsize) // PUT_UTF8 writes max. 8 bytes
+ return -1; // line too long
+ uint32_t c;
uint8_t tmp;
- GET_UTF16(c, src < end - 1 ? get_be16_inc(&src) : 0, EMPTY_STMT)
- PUT_UTF8(c, tmp, *dst++ = tmp; EMPTY_STMT)
+ GET_UTF16(c, stream_read_word_endian(s, utf16 == 2), return -1;)
+ if (s->eof)
+ break; // legitimate EOF; ignore the case of partial reads
+ PUT_UTF8(c, tmp, *cur++ = tmp;)
+ if (c == '\n')
+ break;
}
- *len -= end - src;
- return dstsize - (dst_end - dst);
+ return cur - dst;
+ } else {
+ if (s->buf_pos >= s->buf_len)
+ stream_fill_buffer(s);
+ uint8_t *src = s->buffer + s->buf_pos;
+ int src_len = s->buf_len - s->buf_pos;
+ uint8_t *end = memchr(src, '\n', src_len);
+ int len = end ? end - src + 1 : src_len;
+ if (len > dstsize)
+ return -1; // line too long
+ memcpy(dst, src, len);
+ s->buf_pos += len;
+ return len;
}
- return 0;
}
+// On error, or if the line is larger than max-1, return NULL and unset s->eof.
+// On EOF, return NULL, and s->eof will be set.
+// Otherwise, return the line (including \n or \r\n at the end of the line).
+// If the return value is non-NULL, it's always the same as mem.
+// utf16: 0: UTF8 or 8 bit legacy, 1: UTF16-LE, 2: UTF16-BE
unsigned char *stream_read_line(stream_t *s, unsigned char *mem, int max,
int utf16)
{
- int len;
- const unsigned char *end;
- unsigned char *ptr = mem;
- if (utf16 == -1)
- utf16 = 0;
if (max < 1)
return NULL;
- max--; // reserve one for 0-termination
- do {
- len = s->buf_len - s->buf_pos;
- // try to fill the buffer
- if (len <= 0 &&
- (!stream_fill_buffer(s) ||
- (len = s->buf_len - s->buf_pos) <= 0))
- break;
- end = find_newline(s->buffer + s->buf_pos, len, utf16);
- if (end)
- len = end - (s->buffer + s->buf_pos) + 1;
- if (len > 0 && max > 0) {
- int l = copy_characters(ptr, max, s->buffer + s->buf_pos, &len,
- utf16);
- max -= l;
- ptr += l;
- if (!len)
- break;
+ int read = 0;
+ while (1) {
+ // Reserve 1 byte of ptr for terminating \0.
+ int l = read_characters(s, &mem[read], max - read - 1, utf16);
+ if (l < 0) {
+ MP_VERBOSE(s, "error reading line\n");
+ s->eof = false;
+ return NULL;
}
- s->buf_pos += len;
- } while (!end);
- ptr[0] = 0;
- if (s->eof && ptr == mem)
+ read += l;
+ if (l == 0 || (read > 0 && mem[read - 1] == '\n'))
+ break;
+ }
+ mem[read] = '\0';
+ if (s->eof && read == 0) // legitimate EOF
return NULL;
return mem;
}