summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Halachmi (:avih) <avihpit@yahoo.com>2021-07-23 20:31:15 +0300
committeravih <avih@users.noreply.github.com>2021-08-05 21:32:22 +0300
commit41650203c32e179e5f3cf89e176ef6caccba05d9 (patch)
tree21bb2a4682f33be0ff99f52a3ba3efe847b4d1cb
parent7c264950c0bff588f6852c461c26b37a550a5abb (diff)
downloadmpv-41650203c32e179e5f3cf89e176ef6caccba05d9.tar.bz2
mpv-41650203c32e179e5f3cf89e176ef6caccba05d9.tar.xz
sub: sub-filter-regex and jsre: support ass-to-plaintext
Using --sub-filter-regex-plain (default:no) The ass-to-plaintext functionality already existed at sd_ass.c, but it's internal and uses a private buffer type, so a trivial utility wrapper was added with standard char*/bstr interface. The plaintext can be multi-line, and the multi-line regexp flag is now always set, but only affects plaintext (the ASS source is one line).
-rw-r--r--DOCS/man/options.rst8
-rw-r--r--options/options.c1
-rw-r--r--options/options.h1
-rw-r--r--sub/filter_jsre.c5
-rw-r--r--sub/filter_regex.c5
-rw-r--r--sub/sd.h5
-rw-r--r--sub/sd_ass.c9
7 files changed, 31 insertions, 3 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index c540400c74..c08161f338 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -2804,7 +2804,7 @@ Subtitles
List items are matched in order. If a regular expression matches, the
process is stopped, and the subtitle line is discarded. The text matched
- against is, currently, always the ``Text`` field of ASS events (if the
+ against is, by default, the ``Text`` field of ASS events (if the
subtitle format is different, it is always converted). This may include
formatting tags. Matching is case-insensitive, but how this is done depends
on the libc, and most likely works in ASCII only. It does not work on
@@ -2831,6 +2831,12 @@ Subtitles
Shares/affected-by all ``--sub-filter-regex-*`` control options (see below),
and also experimental. Requires only JavaScript support.
+``--sub-filter-regex-plain=<yes|no>``
+ Whether to first convert the ASS "Text" field to plain-text (default: no).
+ This strips ASS tags and applies ASS directives, like ``\N`` to new-line.
+ If the result is multi-line then the regexp anchors ``^`` and ``$`` match
+ each line, but still any match discards all lines.
+
``--sub-filter-regex-warn=<yes|no>``
Log dropped lines with warning log level, instead of verbose (default: no).
Helpful for testing.
diff --git a/options/options.c b/options/options.c
index 465ac9a35f..2f8885e0ba 100644
--- a/options/options.c
+++ b/options/options.c
@@ -218,6 +218,7 @@ const struct m_sub_options mp_sub_filter_opts = {
{"sub-filter-sdh", OPT_FLAG(sub_filter_SDH)},
{"sub-filter-sdh-harder", OPT_FLAG(sub_filter_SDH_harder)},
{"sub-filter-regex-enable", OPT_FLAG(rf_enable)},
+ {"sub-filter-regex-plain", OPT_FLAG(rf_plain)},
{"sub-filter-regex", OPT_STRINGLIST(rf_items)},
{"sub-filter-jsre", OPT_STRINGLIST(jsre_items)},
{"sub-filter-regex-warn", OPT_FLAG(rf_warn)},
diff --git a/options/options.h b/options/options.h
index 7963d6bd28..f3c8e318e8 100644
--- a/options/options.h
+++ b/options/options.h
@@ -114,6 +114,7 @@ struct mp_sub_filter_opts {
int sub_filter_SDH;
int sub_filter_SDH_harder;
int rf_enable;
+ int rf_plain;
char **rf_items;
char **jsre_items;
int rf_warn;
diff --git a/sub/filter_jsre.c b/sub/filter_jsre.c
index 896382714a..af4fbbeba3 100644
--- a/sub/filter_jsre.c
+++ b/sub/filter_jsre.c
@@ -87,7 +87,7 @@ static bool jsre_init(struct sd_filter *ft)
for (int n = 0; ft->opts->jsre_items && ft->opts->jsre_items[n]; n++) {
char *item = ft->opts->jsre_items[n];
- int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I);
+ int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I | JS_REGEXP_M);
if (err) {
MP_ERR(ft, "jsre: %s -- '%s'\n", get_err(p->J), item);
js_pop(p->J, 1);
@@ -111,6 +111,9 @@ static struct demux_packet *jsre_filter(struct sd_filter *ft,
char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset));
bool drop = false;
+ if (ft->opts->rf_plain)
+ sd_ass_to_plaintext(text, strlen(text), text);
+
for (int n = 0; n < p->num_regexes; n++) {
int found, err = p_regexec(p->J, n, text, &found);
if (err == 0 && found) {
diff --git a/sub/filter_regex.c b/sub/filter_regex.c
index 66e4b1a1da..8e299918ce 100644
--- a/sub/filter_regex.c
+++ b/sub/filter_regex.c
@@ -30,7 +30,7 @@ static bool rf_init(struct sd_filter *ft)
MP_TARRAY_GROW(p, p->regexes, p->num_regexes);
regex_t *preg = &p->regexes[p->num_regexes];
- int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB);
+ int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB | REG_NEWLINE);
if (err) {
char errbuf[512];
regerror(err, preg, errbuf, sizeof(errbuf));
@@ -63,6 +63,9 @@ static struct demux_packet *rf_filter(struct sd_filter *ft,
char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset));
bool drop = false;
+ if (ft->opts->rf_plain)
+ sd_ass_to_plaintext(text, strlen(text), text);
+
for (int n = 0; n < p->num_regexes; n++) {
int err = regexec(&p->regexes[n], text, 0, NULL, 0);
if (err == 0) {
diff --git a/sub/sd.h b/sub/sd.h
index 2e8d71ba79..6801a383b7 100644
--- a/sub/sd.h
+++ b/sub/sd.h
@@ -101,4 +101,9 @@ int sd_ass_fmt_offset(const char *event_format);
// on malformed event: warns and returns (bstr){NULL,0}
bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset);
+// convert \0-terminated "Text" (ass) content to plaintext, possibly in-place.
+// result.start is out, result.len is MIN(out_siz, strlen(in)) or smaller.
+// if there's room: out[result.len] is set to \0. out == in is allowed.
+bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in);
+
#endif
diff --git a/sub/sd_ass.c b/sub/sd_ass.c
index e100b5c5e0..939c000156 100644
--- a/sub/sd_ass.c
+++ b/sub/sd_ass.c
@@ -973,3 +973,12 @@ bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset)
}
return txt;
}
+
+bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in)
+{
+ struct buf b = {out, out_siz, 0};
+ ass_to_plaintext(&b, in);
+ if (b.len < out_siz)
+ out[b.len] = 0;
+ return (bstr){out, b.len};
+}