diff options
-rw-r--r-- | DOCS/man/options.rst | 41 | ||||
-rw-r--r-- | options/options.c | 6 | ||||
-rw-r--r-- | options/options.h | 3 | ||||
-rw-r--r-- | sub/filter_regex.c | 105 | ||||
-rw-r--r-- | sub/sd.h | 1 | ||||
-rw-r--r-- | sub/sd_ass.c | 3 | ||||
-rw-r--r-- | wscript_build.py | 1 |
7 files changed, 160 insertions, 0 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index 0572b66816..5ffc16cf89 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -2571,6 +2571,47 @@ Subtitles Default: ``no``. +``--sub-filter-regex-...=...`` + Set a list of regular expressions to match on text subtitles, and remove any + lines that match (default: empty). This is a string list option. See + `List Options`_ for details. Normally, you should use + ``--sub-filter-regex-append=<regex>``, where each option use will append a + new regular expression, without having to fight escaping problems. + + List items are matched in order. If a regular expression matches, the + process is stopped, and the subtitle line is discarded. The text matched + against is, currently, always the ``Text`` field of ASS events (if the + subtitle format is different, it is always converted). This may include + formatting tags. Matching is case-insensitive, but how this is done depends + on the libc, and most likely works in ASCII only. It does not work on + bitmap/image subtitles. Unavailable on inferior OSes (requires POSIX regex + support). + + .. admonition:: Example + + ``--sub-filter-regex-append=opensubtitles\.org`` filters some ads. + + Technically, using a list for matching is redundant, since you could just + use a single combined regular expression. But it helps with diagnosis, + ease of use, and temporarily disabling or enabling individual filters. + + .. warning:: + + This is experimental. The semantics most likely will change, and if you + use this, you should be prepared to update the option later. Ideas + include replacing the regexes with a very primitive and small subset of + sed, or some method to control case-sensitivity. + +``--sub-filter-regex-warn=<yes|no>`` + Log dropped lines with warning log level, instead of verbose (default: no). + Helpful for testing. + +``--sub-filter-regex-enable=<yes|no>`` + Whether to enable regex filtering (default: yes). Note that if no regexes + are added to the ``--sub-filter-regex`` list, setting this option to ``yes`` + has no default. It's meant to easily disable or enable filtering + temporarily. + ``--sub-create-cc-track=<yes|no>`` For every video stream, create a closed captions track (default: no). The only purpose is to make the track available for selection at the start of diff --git a/options/options.c b/options/options.c index 73cd7ff76c..3dc85162fd 100644 --- a/options/options.c +++ b/options/options.c @@ -201,9 +201,15 @@ const struct m_sub_options mp_sub_filter_opts = { .opts = (const struct m_option[]){ OPT_FLAG("sub-filter-sdh", sub_filter_SDH, 0), OPT_FLAG("sub-filter-sdh-harder", sub_filter_SDH_harder, 0), + OPT_FLAG("sub-filter-regex-enable", rf_enable, 0), + OPT_STRINGLIST("sub-filter-regex", rf_items, 0), + OPT_FLAG("sub-filter-regex-warn", rf_warn, 0), {0} }, .size = sizeof(OPT_BASE_STRUCT), + .defaults = &(OPT_BASE_STRUCT){ + .rf_enable = 1, + }, .change_flags = UPDATE_SUB_FILT, }; diff --git a/options/options.h b/options/options.h index 8fbec3161c..cfb6f44813 100644 --- a/options/options.h +++ b/options/options.h @@ -103,6 +103,9 @@ struct mp_subtitle_opts { struct mp_sub_filter_opts { int sub_filter_SDH; int sub_filter_SDH_harder; + int rf_enable; + char **rf_items; + int rf_warn; }; struct mp_osd_render_opts { diff --git a/sub/filter_regex.c b/sub/filter_regex.c new file mode 100644 index 0000000000..a5aa03a849 --- /dev/null +++ b/sub/filter_regex.c @@ -0,0 +1,105 @@ +#include <regex.h> +#include <sys/types.h> + +#include "common/common.h" +#include "common/msg.h" +#include "misc/bstr.h" +#include "options/options.h" +#include "sd.h" + +struct priv { + int offset; + regex_t *regexes; + int num_regexes; +}; + +static bool rf_init(struct sd_filter *ft) +{ + if (strcmp(ft->codec, "ass") != 0) + return false; + + if (!ft->opts->rf_enable) + return false; + + struct priv *p = talloc_zero(ft, struct priv); + ft->priv = p; + + for (int n = 0; ft->opts->rf_items && ft->opts->rf_items[n]; n++) { + char *item = ft->opts->rf_items[n]; + + MP_TARRAY_GROW(p, p->regexes, p->num_regexes); + regex_t *preg = &p->regexes[p->num_regexes]; + + int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB); + if (err) { + char errbuf[512]; + regerror(err, preg, errbuf, sizeof(errbuf)); + MP_ERR(ft, "Regular expression error: '%s'\n", errbuf); + continue; + } + + p->num_regexes += 1; + } + + if (!p->num_regexes) + return false; + + char *headers = ft->event_format; + while (headers && headers[0]) { + p->offset += 1; + headers = strchr(headers, ','); + if (headers) + headers += 1; + } + p->offset -= 1; // removes Start/End, adds ReadOrder + + return true; +} + +static void rf_uninit(struct sd_filter *ft) +{ + struct priv *p = ft->priv; + + for (int n = 0; n < p->num_regexes; n++) + regfree(&p->regexes[n]); +} + +static struct demux_packet *rf_filter(struct sd_filter *ft, + struct demux_packet *pkt) +{ + struct priv *p = ft->priv; + char *line = bstrto0(NULL, (bstr){(char *)pkt->buffer, pkt->len}); + bool drop = false; + + char *text = line; + for (int n = 0; n < p->offset - 1; n++) { + text = strchr(text, ','); + if (!text) { + MP_WARN(ft, "Malformed event: '%s'\n", line); + text = line; // shouldn't happen; random fallback + break; + } + text = text + 1; + } + + for (int n = 0; n < p->num_regexes; n++) { + int err = regexec(&p->regexes[n], text, 0, NULL, 0); + if (err == 0) { + int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V; + MP_MSG(ft, level, "Matching regex %d => drop: '%s'\n", n, text); + drop = true; + break; + } else if (err != REG_NOMATCH) { + MP_WARN(ft, "Error on regexec() on regex %d.\n", n); + } + } + + talloc_free(line); + return drop ? NULL : pkt; +} + +const struct sd_filter_functions sd_filter_regex = { + .init = rf_init, + .uninit = rf_uninit, + .filter = rf_filter, +}; @@ -87,5 +87,6 @@ struct sd_filter_functions { }; extern const struct sd_filter_functions sd_filter_sdh; +extern const struct sd_filter_functions sd_filter_regex; #endif diff --git a/sub/sd_ass.c b/sub/sd_ass.c index 2b0cf13127..40e5093d8e 100644 --- a/sub/sd_ass.c +++ b/sub/sd_ass.c @@ -65,6 +65,9 @@ static void fill_plaintext(struct sd *sd, double pts); static const struct sd_filter_functions *const filters[] = { // Note: list order defines filter order. &sd_filter_sdh, +#if HAVE_POSIX + &sd_filter_regex, +#endif NULL, }; diff --git a/wscript_build.py b/wscript_build.py index 488a81f57f..a95effca76 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -388,6 +388,7 @@ def build(ctx): ( "sub/ass_mp.c", "libass"), ( "sub/dec_sub.c" ), ( "sub/draw_bmp.c" ), + ( "sub/filter_regex.c", "posix" ), ( "sub/filter_sdh.c" ), ( "sub/img_convert.c" ), ( "sub/lavc_conv.c" ), |