diff options
author | wm4 <wm4@nowhere> | 2020-02-16 02:03:36 +0100 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2020-02-16 02:07:24 +0100 |
commit | a4eb8f75c0644ce5cf7c0bcea6b3efd2a886027d (patch) | |
tree | fa028721887a4bc984a20c946c4ee95295677b7a /sub | |
parent | 0b35b4c91796fb020e13d955efd450021eb5eedb (diff) | |
download | mpv-a4eb8f75c0644ce5cf7c0bcea6b3efd2a886027d.tar.bz2 mpv-a4eb8f75c0644ce5cf7c0bcea6b3efd2a886027d.tar.xz |
sub: add an option to filter subtitles by regex
Works as ad-filter. I had some more plans, for example replacing
matching text with different text, but for now it's dropping matches
only. There's a big warning in the manpage that I might change
semantics. For example, I might turn it into a primitive sed.
In a sane world, you'd probably write a simple script that processes
downloaded subtitles before giving them to mpv, and avoid all this
complexity. But we don't live in a sane world, and the sooner you learn
this, the happier you will be. (But I also want to run this on muxed
subtitles.)
This is pretty straightforward. We use POSIX regexes, which are readily
available without additional pain or dependencies. This also means it's
(apparently) not available on win32 (MinGW). The regex list is because I
hate big monolithic regexes, and this makes it slightly better.
Very superficially tested.
Diffstat (limited to 'sub')
-rw-r--r-- | sub/filter_regex.c | 105 | ||||
-rw-r--r-- | sub/sd.h | 1 | ||||
-rw-r--r-- | sub/sd_ass.c | 3 |
3 files changed, 109 insertions, 0 deletions
diff --git a/sub/filter_regex.c b/sub/filter_regex.c new file mode 100644 index 0000000000..a5aa03a849 --- /dev/null +++ b/sub/filter_regex.c @@ -0,0 +1,105 @@ +#include <regex.h> +#include <sys/types.h> + +#include "common/common.h" +#include "common/msg.h" +#include "misc/bstr.h" +#include "options/options.h" +#include "sd.h" + +struct priv { + int offset; + regex_t *regexes; + int num_regexes; +}; + +static bool rf_init(struct sd_filter *ft) +{ + if (strcmp(ft->codec, "ass") != 0) + return false; + + if (!ft->opts->rf_enable) + return false; + + struct priv *p = talloc_zero(ft, struct priv); + ft->priv = p; + + for (int n = 0; ft->opts->rf_items && ft->opts->rf_items[n]; n++) { + char *item = ft->opts->rf_items[n]; + + MP_TARRAY_GROW(p, p->regexes, p->num_regexes); + regex_t *preg = &p->regexes[p->num_regexes]; + + int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB); + if (err) { + char errbuf[512]; + regerror(err, preg, errbuf, sizeof(errbuf)); + MP_ERR(ft, "Regular expression error: '%s'\n", errbuf); + continue; + } + + p->num_regexes += 1; + } + + if (!p->num_regexes) + return false; + + char *headers = ft->event_format; + while (headers && headers[0]) { + p->offset += 1; + headers = strchr(headers, ','); + if (headers) + headers += 1; + } + p->offset -= 1; // removes Start/End, adds ReadOrder + + return true; +} + +static void rf_uninit(struct sd_filter *ft) +{ + struct priv *p = ft->priv; + + for (int n = 0; n < p->num_regexes; n++) + regfree(&p->regexes[n]); +} + +static struct demux_packet *rf_filter(struct sd_filter *ft, + struct demux_packet *pkt) +{ + struct priv *p = ft->priv; + char *line = bstrto0(NULL, (bstr){(char *)pkt->buffer, pkt->len}); + bool drop = false; + + char *text = line; + for (int n = 0; n < p->offset - 1; n++) { + text = strchr(text, ','); + if (!text) { + MP_WARN(ft, "Malformed event: '%s'\n", line); + text = line; // shouldn't happen; random fallback + break; + } + text = text + 1; + } + + for (int n = 0; n < p->num_regexes; n++) { + int err = regexec(&p->regexes[n], text, 0, NULL, 0); + if (err == 0) { + int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V; + MP_MSG(ft, level, "Matching regex %d => drop: '%s'\n", n, text); + drop = true; + break; + } else if (err != REG_NOMATCH) { + MP_WARN(ft, "Error on regexec() on regex %d.\n", n); + } + } + + talloc_free(line); + return drop ? NULL : pkt; +} + +const struct sd_filter_functions sd_filter_regex = { + .init = rf_init, + .uninit = rf_uninit, + .filter = rf_filter, +}; @@ -87,5 +87,6 @@ struct sd_filter_functions { }; extern const struct sd_filter_functions sd_filter_sdh; +extern const struct sd_filter_functions sd_filter_regex; #endif diff --git a/sub/sd_ass.c b/sub/sd_ass.c index 2b0cf13127..40e5093d8e 100644 --- a/sub/sd_ass.c +++ b/sub/sd_ass.c @@ -65,6 +65,9 @@ static void fill_plaintext(struct sd *sd, double pts); static const struct sd_filter_functions *const filters[] = { // Note: list order defines filter order. &sd_filter_sdh, +#if HAVE_POSIX + &sd_filter_regex, +#endif NULL, }; |