summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Halachmi (:avih) <avihpit@yahoo.com>2021-07-23 19:11:23 +0300
committeravih <avih@users.noreply.github.com>2021-08-05 21:32:22 +0300
commit7c264950c0bff588f6852c461c26b37a550a5abb (patch)
treee76d7b54f82e17d709b82c264429069b011cb5dc
parentd82a0730690f52f35b14da591564855edd361e34 (diff)
downloadmpv-7c264950c0bff588f6852c461c26b37a550a5abb.tar.bz2
mpv-7c264950c0bff588f6852c461c26b37a550a5abb.tar.xz
sub: new: --sub-filter-jsre (js regex)
Pretty much identical to filter-regex but with JS expressions and requires only JS support. Shares the filter-regex-* control options. The target audience is Windows users - where filter-regex doesn't work due to missing APIs, but mujs builds cleanly on Windows, and JS is usually enabled in 3rd party Windows mpv builds. Lua could have been used with similar effort, however, the JS regex syntax is more extensive and also much more similar to POSIX.
-rw-r--r--DOCS/man/options.rst5
-rw-r--r--options/options.c1
-rw-r--r--options/options.h1
-rw-r--r--sub/filter_jsre.c134
-rw-r--r--sub/sd.h1
-rw-r--r--sub/sd_ass.c3
-rw-r--r--wscript_build.py1
7 files changed, 146 insertions, 0 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 65626b80c0..c540400c74 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -2826,6 +2826,11 @@ Subtitles
include replacing the regexes with a very primitive and small subset of
sed, or some method to control case-sensitivity.
+``--sub-filter-jsre-...=...``
+ Same as ``--sub-filter-regex`` but with JavaScript regular expressions.
+ Shares/affected-by all ``--sub-filter-regex-*`` control options (see below),
+ and also experimental. Requires only JavaScript support.
+
``--sub-filter-regex-warn=<yes|no>``
Log dropped lines with warning log level, instead of verbose (default: no).
Helpful for testing.
diff --git a/options/options.c b/options/options.c
index fac31a2bb9..465ac9a35f 100644
--- a/options/options.c
+++ b/options/options.c
@@ -219,6 +219,7 @@ const struct m_sub_options mp_sub_filter_opts = {
{"sub-filter-sdh-harder", OPT_FLAG(sub_filter_SDH_harder)},
{"sub-filter-regex-enable", OPT_FLAG(rf_enable)},
{"sub-filter-regex", OPT_STRINGLIST(rf_items)},
+ {"sub-filter-jsre", OPT_STRINGLIST(jsre_items)},
{"sub-filter-regex-warn", OPT_FLAG(rf_warn)},
{0}
},
diff --git a/options/options.h b/options/options.h
index 41aa88abb9..7963d6bd28 100644
--- a/options/options.h
+++ b/options/options.h
@@ -115,6 +115,7 @@ struct mp_sub_filter_opts {
int sub_filter_SDH_harder;
int rf_enable;
char **rf_items;
+ char **jsre_items;
int rf_warn;
};
diff --git a/sub/filter_jsre.c b/sub/filter_jsre.c
new file mode 100644
index 0000000000..896382714a
--- /dev/null
+++ b/sub/filter_jsre.c
@@ -0,0 +1,134 @@
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <mujs.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "misc/bstr.h"
+#include "options/options.h"
+#include "sd.h"
+
+
+// p_NAME are protected functions (never throw) which interact with the JS VM.
+// return 0 on successful interaction, not-0 on (caught) js-error.
+// on error: stack is the same as on entry + an error value
+
+// js: global[n] = new RegExp(str, flags)
+static int p_regcomp(js_State *J, int n, const char *str, int flags)
+{
+ if (js_try(J))
+ return 1;
+
+ js_pushnumber(J, n); // n
+ js_newregexp(J, str, flags); // n regex
+ js_setglobal(J, js_tostring(J, -2)); // n (and global[n] is the regex)
+ js_pop(J, 1);
+
+ js_endtry(J);
+ return 0;
+}
+
+// js: found = global[n].test(text)
+static int p_regexec(js_State *J, int n, const char *text, int *found)
+{
+ if (js_try(J))
+ return 1;
+
+ js_pushnumber(J, n); // n
+ js_getglobal(J, js_tostring(J, -1)); // n global[n]
+ js_getproperty(J, -1, "test"); // n global[n] global[n].test
+ js_rot2(J); // n global[n].test global[n] (n, test(), and its `this')
+ js_pushstring(J, text); // n global[n].test global[n] text
+ js_call(J, 1); // n test-result
+ *found = js_toboolean(J, -1);
+ js_pop(J, 2); // the result and n
+
+ js_endtry(J);
+ return 0;
+}
+
+// protected. caller should pop the error after using the result string.
+static const char *get_err(js_State *J)
+{
+ return js_trystring(J, -1, "unknown error");
+}
+
+
+struct priv {
+ js_State *J;
+ int num_regexes;
+ int offset;
+};
+
+static void destruct_priv(void *p)
+{
+ js_freestate(((struct priv *)p)->J);
+}
+
+static bool jsre_init(struct sd_filter *ft)
+{
+ if (strcmp(ft->codec, "ass") != 0)
+ return false;
+
+ if (!ft->opts->rf_enable)
+ return false;
+
+ struct priv *p = talloc_zero(ft, struct priv);
+ ft->priv = p;
+
+ p->J = js_newstate(0, 0, JS_STRICT);
+ if (!p->J) {
+ MP_ERR(ft, "jsre: VM init error\n");
+ return false;
+ }
+ talloc_set_destructor(p, destruct_priv);
+
+ for (int n = 0; ft->opts->jsre_items && ft->opts->jsre_items[n]; n++) {
+ char *item = ft->opts->jsre_items[n];
+
+ int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I);
+ if (err) {
+ MP_ERR(ft, "jsre: %s -- '%s'\n", get_err(p->J), item);
+ js_pop(p->J, 1);
+ continue;
+ }
+
+ p->num_regexes += 1;
+ }
+
+ if (!p->num_regexes)
+ return false;
+
+ p->offset = sd_ass_fmt_offset(ft->event_format);
+ return true;
+}
+
+static struct demux_packet *jsre_filter(struct sd_filter *ft,
+ struct demux_packet *pkt)
+{
+ struct priv *p = ft->priv;
+ char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset));
+ bool drop = false;
+
+ for (int n = 0; n < p->num_regexes; n++) {
+ int found, err = p_regexec(p->J, n, text, &found);
+ if (err == 0 && found) {
+ int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V;
+ MP_MSG(ft, level, "jsre: regex %d => drop: '%s'\n", n, text);
+ drop = true;
+ break;
+ } else if (err) {
+ MP_WARN(ft, "jsre: test regex %d: %s.\n", n, get_err(p->J));
+ js_pop(p->J, 1);
+ }
+ }
+
+ talloc_free(text);
+ return drop ? NULL : pkt;
+}
+
+const struct sd_filter_functions sd_filter_jsre = {
+ .init = jsre_init,
+ .filter = jsre_filter,
+};
diff --git a/sub/sd.h b/sub/sd.h
index 2d107d1769..2e8d71ba79 100644
--- a/sub/sd.h
+++ b/sub/sd.h
@@ -88,6 +88,7 @@ struct sd_filter_functions {
extern const struct sd_filter_functions sd_filter_sdh;
extern const struct sd_filter_functions sd_filter_regex;
+extern const struct sd_filter_functions sd_filter_jsre;
// convenience utils for filters with ass codec
diff --git a/sub/sd_ass.c b/sub/sd_ass.c
index 0da6df41c8..e100b5c5e0 100644
--- a/sub/sd_ass.c
+++ b/sub/sd_ass.c
@@ -68,6 +68,9 @@ static const struct sd_filter_functions *const filters[] = {
#if HAVE_POSIX
&sd_filter_regex,
#endif
+#if HAVE_JAVASCRIPT
+ &sd_filter_jsre,
+#endif
NULL,
};
diff --git a/wscript_build.py b/wscript_build.py
index fbec5006f8..384bb50d2e 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -376,6 +376,7 @@ def build(ctx):
( "sub/dec_sub.c" ),
( "sub/draw_bmp.c" ),
( "sub/filter_regex.c", "posix" ),
+ ( "sub/filter_jsre.c", "javascript" ),
( "sub/filter_sdh.c" ),
( "sub/img_convert.c" ),
( "sub/lavc_conv.c" ),