summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst41
-rw-r--r--options/options.c6
-rw-r--r--options/options.h3
-rw-r--r--sub/filter_regex.c105
-rw-r--r--sub/sd.h1
-rw-r--r--sub/sd_ass.c3
-rw-r--r--wscript_build.py1
7 files changed, 160 insertions, 0 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 0572b66816..5ffc16cf89 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -2571,6 +2571,47 @@ Subtitles
Default: ``no``.
+``--sub-filter-regex-...=...``
+ Set a list of regular expressions to match on text subtitles, and remove any
+ lines that match (default: empty). This is a string list option. See
+ `List Options`_ for details. Normally, you should use
+ ``--sub-filter-regex-append=<regex>``, where each option use will append a
+ new regular expression, without having to fight escaping problems.
+
+ List items are matched in order. If a regular expression matches, the
+ process is stopped, and the subtitle line is discarded. The text matched
+ against is, currently, always the ``Text`` field of ASS events (if the
+ subtitle format is different, it is always converted). This may include
+ formatting tags. Matching is case-insensitive, but how this is done depends
+ on the libc, and most likely works in ASCII only. It does not work on
+ bitmap/image subtitles. Unavailable on inferior OSes (requires POSIX regex
+ support).
+
+ .. admonition:: Example
+
+ ``--sub-filter-regex-append=opensubtitles\.org`` filters some ads.
+
+ Technically, using a list for matching is redundant, since you could just
+ use a single combined regular expression. But it helps with diagnosis,
+ ease of use, and temporarily disabling or enabling individual filters.
+
+ .. warning::
+
+ This is experimental. The semantics most likely will change, and if you
+ use this, you should be prepared to update the option later. Ideas
+ include replacing the regexes with a very primitive and small subset of
+ sed, or some method to control case-sensitivity.
+
+``--sub-filter-regex-warn=<yes|no>``
+ Log dropped lines with warning log level, instead of verbose (default: no).
+ Helpful for testing.
+
+``--sub-filter-regex-enable=<yes|no>``
+ Whether to enable regex filtering (default: yes). Note that if no regexes
+ are added to the ``--sub-filter-regex`` list, setting this option to ``yes``
+ has no default. It's meant to easily disable or enable filtering
+ temporarily.
+
``--sub-create-cc-track=<yes|no>``
For every video stream, create a closed captions track (default: no). The
only purpose is to make the track available for selection at the start of
diff --git a/options/options.c b/options/options.c
index 73cd7ff76c..3dc85162fd 100644
--- a/options/options.c
+++ b/options/options.c
@@ -201,9 +201,15 @@ const struct m_sub_options mp_sub_filter_opts = {
.opts = (const struct m_option[]){
OPT_FLAG("sub-filter-sdh", sub_filter_SDH, 0),
OPT_FLAG("sub-filter-sdh-harder", sub_filter_SDH_harder, 0),
+ OPT_FLAG("sub-filter-regex-enable", rf_enable, 0),
+ OPT_STRINGLIST("sub-filter-regex", rf_items, 0),
+ OPT_FLAG("sub-filter-regex-warn", rf_warn, 0),
{0}
},
.size = sizeof(OPT_BASE_STRUCT),
+ .defaults = &(OPT_BASE_STRUCT){
+ .rf_enable = 1,
+ },
.change_flags = UPDATE_SUB_FILT,
};
diff --git a/options/options.h b/options/options.h
index 8fbec3161c..cfb6f44813 100644
--- a/options/options.h
+++ b/options/options.h
@@ -103,6 +103,9 @@ struct mp_subtitle_opts {
struct mp_sub_filter_opts {
int sub_filter_SDH;
int sub_filter_SDH_harder;
+ int rf_enable;
+ char **rf_items;
+ int rf_warn;
};
struct mp_osd_render_opts {
diff --git a/sub/filter_regex.c b/sub/filter_regex.c
new file mode 100644
index 0000000000..a5aa03a849
--- /dev/null
+++ b/sub/filter_regex.c
@@ -0,0 +1,105 @@
+#include <regex.h>
+#include <sys/types.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "misc/bstr.h"
+#include "options/options.h"
+#include "sd.h"
+
+struct priv {
+ int offset;
+ regex_t *regexes;
+ int num_regexes;
+};
+
+static bool rf_init(struct sd_filter *ft)
+{
+ if (strcmp(ft->codec, "ass") != 0)
+ return false;
+
+ if (!ft->opts->rf_enable)
+ return false;
+
+ struct priv *p = talloc_zero(ft, struct priv);
+ ft->priv = p;
+
+ for (int n = 0; ft->opts->rf_items && ft->opts->rf_items[n]; n++) {
+ char *item = ft->opts->rf_items[n];
+
+ MP_TARRAY_GROW(p, p->regexes, p->num_regexes);
+ regex_t *preg = &p->regexes[p->num_regexes];
+
+ int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB);
+ if (err) {
+ char errbuf[512];
+ regerror(err, preg, errbuf, sizeof(errbuf));
+ MP_ERR(ft, "Regular expression error: '%s'\n", errbuf);
+ continue;
+ }
+
+ p->num_regexes += 1;
+ }
+
+ if (!p->num_regexes)
+ return false;
+
+ char *headers = ft->event_format;
+ while (headers && headers[0]) {
+ p->offset += 1;
+ headers = strchr(headers, ',');
+ if (headers)
+ headers += 1;
+ }
+ p->offset -= 1; // removes Start/End, adds ReadOrder
+
+ return true;
+}
+
+static void rf_uninit(struct sd_filter *ft)
+{
+ struct priv *p = ft->priv;
+
+ for (int n = 0; n < p->num_regexes; n++)
+ regfree(&p->regexes[n]);
+}
+
+static struct demux_packet *rf_filter(struct sd_filter *ft,
+ struct demux_packet *pkt)
+{
+ struct priv *p = ft->priv;
+ char *line = bstrto0(NULL, (bstr){(char *)pkt->buffer, pkt->len});
+ bool drop = false;
+
+ char *text = line;
+ for (int n = 0; n < p->offset - 1; n++) {
+ text = strchr(text, ',');
+ if (!text) {
+ MP_WARN(ft, "Malformed event: '%s'\n", line);
+ text = line; // shouldn't happen; random fallback
+ break;
+ }
+ text = text + 1;
+ }
+
+ for (int n = 0; n < p->num_regexes; n++) {
+ int err = regexec(&p->regexes[n], text, 0, NULL, 0);
+ if (err == 0) {
+ int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V;
+ MP_MSG(ft, level, "Matching regex %d => drop: '%s'\n", n, text);
+ drop = true;
+ break;
+ } else if (err != REG_NOMATCH) {
+ MP_WARN(ft, "Error on regexec() on regex %d.\n", n);
+ }
+ }
+
+ talloc_free(line);
+ return drop ? NULL : pkt;
+}
+
+const struct sd_filter_functions sd_filter_regex = {
+ .init = rf_init,
+ .uninit = rf_uninit,
+ .filter = rf_filter,
+};
diff --git a/sub/sd.h b/sub/sd.h
index 0d361edba6..a3085be864 100644
--- a/sub/sd.h
+++ b/sub/sd.h
@@ -87,5 +87,6 @@ struct sd_filter_functions {
};
extern const struct sd_filter_functions sd_filter_sdh;
+extern const struct sd_filter_functions sd_filter_regex;
#endif
diff --git a/sub/sd_ass.c b/sub/sd_ass.c
index 2b0cf13127..40e5093d8e 100644
--- a/sub/sd_ass.c
+++ b/sub/sd_ass.c
@@ -65,6 +65,9 @@ static void fill_plaintext(struct sd *sd, double pts);
static const struct sd_filter_functions *const filters[] = {
// Note: list order defines filter order.
&sd_filter_sdh,
+#if HAVE_POSIX
+ &sd_filter_regex,
+#endif
NULL,
};
diff --git a/wscript_build.py b/wscript_build.py
index 488a81f57f..a95effca76 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -388,6 +388,7 @@ def build(ctx):
( "sub/ass_mp.c", "libass"),
( "sub/dec_sub.c" ),
( "sub/draw_bmp.c" ),
+ ( "sub/filter_regex.c", "posix" ),
( "sub/filter_sdh.c" ),
( "sub/img_convert.c" ),
( "sub/lavc_conv.c" ),