summaryrefslogtreecommitdiffstats
path: root/stream
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2014-01-19 20:21:11 +0100
committerwm4 <wm4@nowhere>2014-01-19 21:15:54 +0100
commit333a9491b7f01a1d61414f0e6ffe9d81a4acb300 (patch)
treeb820b433a21391f550857d83594e16d337cbaf5f /stream
parent6afebbd0d9975c7aed9351a2a10740abeb111817 (diff)
downloadmpv-333a9491b7f01a1d61414f0e6ffe9d81a4acb300.tar.bz2
mpv-333a9491b7f01a1d61414f0e6ffe9d81a4acb300.tar.xz
stream: redo stream_read_line()
This simplifies the implementation and should make it more robust. For example, we return an error if a line is longer than the provided buffer (instead of splitting the line). The code is much shorter, because now finding the new line and reading characters is done in one go.
Diffstat (limited to 'stream')
-rw-r--r--stream/stream.c168
1 files changed, 54 insertions, 114 deletions
diff --git a/stream/stream.c b/stream/stream.c
index b48de575ea..fc786ec305 100644
--- a/stream/stream.c
+++ b/stream/stream.c
@@ -30,8 +30,8 @@
#include <strings.h>
#include <assert.h>
-#include <libavutil/intreadwrite.h>
#include <libavutil/common.h>
+#include "compat/mpbswap.h"
#include "talloc.h"
@@ -841,134 +841,74 @@ static int stream_enable_cache(stream_t **stream, int64_t size, int64_t min,
return res;
}
-/**
- * Helper function to read 16 bits little-endian and advance pointer
- */
-static uint16_t get_le16_inc(const uint8_t **buf)
-{
- uint16_t v = AV_RL16(*buf);
- *buf += 2;
- return v;
-}
-
-/**
- * Helper function to read 16 bits big-endian and advance pointer
- */
-static uint16_t get_be16_inc(const uint8_t **buf)
+static uint16_t stream_read_word_endian(stream_t *s, bool big_endian)
{
- uint16_t v = AV_RB16(*buf);
- *buf += 2;
- return v;
+ unsigned int y = stream_read_char(s);
+ y = (y << 8) | stream_read_char(s);
+ if (big_endian)
+ y = bswap_16(y);
+ return y;
}
-/**
- * Find a newline character in buffer
- * \param buf buffer to search
- * \param len amount of bytes to search in buffer, may not overread
- * \param utf16 chose between UTF-8/ASCII/other and LE and BE UTF-16
- * 0 = UTF-8/ASCII/other, 1 = UTF-16-LE, 2 = UTF-16-BE
- */
-static const uint8_t *find_newline(const uint8_t *buf, int len, int utf16)
+// Read characters until the next '\n' (including), or until the buffer in s is
+// exhausted.
+static int read_characters(stream_t *s, uint8_t *dst, int dstsize, int utf16)
{
- uint32_t c;
- const uint8_t *end = buf + len;
- switch (utf16) {
- case 0:
- return (uint8_t *)memchr(buf, '\n', len);
- case 1:
- while (buf < end - 1) {
- GET_UTF16(c, buf < end - 1 ? get_le16_inc(&buf) : 0, return NULL;)
- if (buf <= end && c == '\n')
- return buf - 1;
- }
- break;
- case 2:
- while (buf < end - 1) {
- GET_UTF16(c, buf < end - 1 ? get_be16_inc(&buf) : 0, return NULL;)
- if (buf <= end && c == '\n')
- return buf - 1;
- }
- break;
- }
- return NULL;
-}
-
-#define EMPTY_STMT do{}while(0);
-
-/**
- * Copy a number of bytes, converting to UTF-8 if input is UTF-16
- * \param dst buffer to copy to
- * \param dstsize size of dst buffer
- * \param src buffer to copy from
- * \param len amount of bytes to copy from src
- * \param utf16 chose between UTF-8/ASCII/other and LE and BE UTF-16
- * 0 = UTF-8/ASCII/other, 1 = UTF-16-LE, 2 = UTF-16-BE
- */
-static int copy_characters(uint8_t *dst, int dstsize,
- const uint8_t *src, int *len, int utf16)
-{
- uint32_t c;
- uint8_t *dst_end = dst + dstsize;
- const uint8_t *end = src + *len;
- switch (utf16) {
- case 0:
- if (*len > dstsize)
- *len = dstsize;
- memcpy(dst, src, *len);
- return *len;
- case 1:
- while (src < end - 1 && dst_end - dst > 8) {
- uint8_t tmp;
- GET_UTF16(c, src < end - 1 ? get_le16_inc(&src) : 0, EMPTY_STMT)
- PUT_UTF8(c, tmp, *dst++ = tmp; EMPTY_STMT)
- }
- *len -= end - src;
- return dstsize - (dst_end - dst);
- case 2:
- while (src < end - 1 && dst_end - dst > 8) {
+ if (utf16 == 1 || utf16 == 2) {
+ uint8_t *cur = dst;
+ while (1) {
+ if ((cur - dst) + 8 >= dstsize) // PUT_UTF8 writes max. 8 bytes
+ return -1; // line too long
+ uint32_t c;
uint8_t tmp;
- GET_UTF16(c, src < end - 1 ? get_be16_inc(&src) : 0, EMPTY_STMT)
- PUT_UTF8(c, tmp, *dst++ = tmp; EMPTY_STMT)
+ GET_UTF16(c, stream_read_word_endian(s, utf16 == 2), return -1;)
+ if (s->eof)
+ break; // legitimate EOF; ignore the case of partial reads
+ PUT_UTF8(c, tmp, *cur++ = tmp;)
+ if (c == '\n')
+ break;
}
- *len -= end - src;
- return dstsize - (dst_end - dst);
+ return cur - dst;
+ } else {
+ if (s->buf_pos >= s->buf_len)
+ stream_fill_buffer(s);
+ uint8_t *src = s->buffer + s->buf_pos;
+ int src_len = s->buf_len - s->buf_pos;
+ uint8_t *end = memchr(src, '\n', src_len);
+ int len = end ? end - src + 1 : src_len;
+ if (len > dstsize)
+ return -1; // line too long
+ memcpy(dst, src, len);
+ s->buf_pos += len;
+ return len;
}
- return 0;
}
+// On error, or if the line is larger than max-1, return NULL and unset s->eof.
+// On EOF, return NULL, and s->eof will be set.
+// Otherwise, return the line (including \n or \r\n at the end of the line).
+// If the return value is non-NULL, it's always the same as mem.
+// utf16: 0: UTF8 or 8 bit legacy, 1: UTF16-LE, 2: UTF16-BE
unsigned char *stream_read_line(stream_t *s, unsigned char *mem, int max,
int utf16)
{
- int len;
- const unsigned char *end;
- unsigned char *ptr = mem;
- if (utf16 == -1)
- utf16 = 0;
if (max < 1)
return NULL;
- max--; // reserve one for 0-termination
- do {
- len = s->buf_len - s->buf_pos;
- // try to fill the buffer
- if (len <= 0 &&
- (!stream_fill_buffer(s) ||
- (len = s->buf_len - s->buf_pos) <= 0))
- break;
- end = find_newline(s->buffer + s->buf_pos, len, utf16);
- if (end)
- len = end - (s->buffer + s->buf_pos) + 1;
- if (len > 0 && max > 0) {
- int l = copy_characters(ptr, max, s->buffer + s->buf_pos, &len,
- utf16);
- max -= l;
- ptr += l;
- if (!len)
- break;
+ int read = 0;
+ while (1) {
+ // Reserve 1 byte of ptr for terminating \0.
+ int l = read_characters(s, &mem[read], max - read - 1, utf16);
+ if (l < 0) {
+ MP_VERBOSE(s, "error reading line\n");
+ s->eof = false;
+ return NULL;
}
- s->buf_pos += len;
- } while (!end);
- ptr[0] = 0;
- if (s->eof && ptr == mem)
+ read += l;
+ if (l == 0 || (read > 0 && mem[read - 1] == '\n'))
+ break;
+ }
+ mem[read] = '\0';
+ if (s->eof && read == 0) // legitimate EOF
return NULL;
return mem;
}