From 33e999de82dffcdd1404468a8e93eef8b7cc99a3 Mon Sep 17 00:00:00 2001 From: wm4 Date: Thu, 9 Jan 2020 02:25:13 +0100 Subject: stream_libarchive: fix unnecessarily opening all volumes on opening Seems like I'm still not done with rar playback stuff... It turns out the reason for archive_read_open1() opening all volumes had nothing to do with libarchive's rar code, but was a consequence of how multi volume support is implemented in libarchive, and due to the fact that we enabled archive_read_support_format_zip_seekable() (through archive_read_support_format_zip()). The seekable zip format will seek to the end of the file and search for a zip "header" there. It could possibly be considered a libarchive bug that it does that even if it's fairly sure that it's a RAR file. We already do probing on a small buffer read from the start of the file (i.e. not giving libarchive a way to seek the stream before we think it's an archive), but that does not help, since libarchive needs to probe _again_. libarchive does not seem to provide a function to query the format (no archive_read_get_format()). Which seems quite strange, but at least I didn't find one. This commit works this around by doing some manual rar/zip probing. We could have gone only with rar probing. But detecting zip separately allows us to avoid that stream_libarchive seeks to the end during early probing. This is an additional bonus on top of "fixing" multi volume rar. The zip probing is from archive_read_format_zip_streamable_bid(). The rar signature is the common prefix of the rar and rar5 formats in libarchive (presumably the RAR fixed header parts without version). If the demuxer seeks to the end of the rar entry, this will still open all volumes; I'm not sure whether the old/removed rar code in mpv could handle this better. See: #7182 --- stream/stream_libarchive.c | 63 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/stream/stream_libarchive.c b/stream/stream_libarchive.c index 9d09e1991b..c11ec706fb 100644 --- a/stream/stream_libarchive.c +++ b/stream/stream_libarchive.c @@ -33,6 +33,33 @@ struct mp_archive_volume { char *url; }; +static bool probe_rar(struct stream *s) +{ + static uint8_t rar_sig[] = {0x52, 0x61, 0x72, 0x21, 0x1a, 0x07}; + uint8_t buf[6]; + if (stream_read_peek(s, buf, sizeof(buf)) != sizeof(buf)) + return false; + return memcmp(buf, rar_sig, 6) == 0; +} + +static bool probe_zip(struct stream *s) +{ + uint8_t p[4]; + if (stream_read_peek(s, p, sizeof(p)) != sizeof(p)) + return false; + // Lifted from libarchive, BSD license. + if (p[0] == 'P' && p[1] == 'K') { + if ((p[2] == '\001' && p[3] == '\002') || + (p[2] == '\003' && p[3] == '\004') || + (p[2] == '\005' && p[3] == '\006') || + (p[2] == '\006' && p[3] == '\006') || + (p[2] == '\007' && p[3] == '\010') || + (p[2] == '0' && p[3] == '0')) + return true; + } + return false; +} + static bool volume_seek(struct mp_archive_volume *vol) { if (!vol->src || vol->seek_to < 0) @@ -267,17 +294,35 @@ struct mp_archive *mp_archive_new(struct mp_log *log, struct stream *src, locale_t oldlocale = uselocale(mpa->locale); - archive_read_support_format_7zip(mpa->arch); - archive_read_support_format_iso9660(mpa->arch); + bool maybe_rar = probe_rar(src); + bool maybe_zip = probe_zip(src); + bool probe_all = flags & MP_ARCHIVE_FLAG_UNSAFE; + archive_read_support_format_rar(mpa->arch); archive_read_support_format_rar5(mpa->arch); - archive_read_support_format_zip(mpa->arch); - archive_read_support_filter_bzip2(mpa->arch); - archive_read_support_filter_gzip(mpa->arch); - archive_read_support_filter_xz(mpa->arch); - if (flags & MP_ARCHIVE_FLAG_UNSAFE) { - archive_read_support_format_gnutar(mpa->arch); - archive_read_support_format_tar(mpa->arch); + + // Exclude other formats if it's probably RAR, because other formats may + // behave suboptimal with multiple volumes exposed, such as opening every + // single volume by seeking at the end of the file. + if (!maybe_rar) { + archive_read_support_format_7zip(mpa->arch); + archive_read_support_format_iso9660(mpa->arch); + archive_read_support_filter_bzip2(mpa->arch); + archive_read_support_filter_gzip(mpa->arch); + archive_read_support_filter_xz(mpa->arch); + archive_read_support_format_zip_streamable(mpa->arch); + + if (probe_all) { + archive_read_support_format_gnutar(mpa->arch); + archive_read_support_format_tar(mpa->arch); + } + + // This zip reader is normally preferable. However, it seeks to the end + // of the file, which may be annoying (HTTP reconnect, volume skipping), + // so use it only as last resort, or if it's relatively likely that it's + // really zip. + if (maybe_zip || probe_all) + archive_read_support_format_zip_seekable(mpa->arch); } archive_read_set_read_callback(mpa->arch, read_cb); -- cgit v1.2.3