summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst5
-rw-r--r--options/options.c1
-rw-r--r--options/options.h1
-rw-r--r--sub/filter_jsre.c134
-rw-r--r--sub/sd.h1
-rw-r--r--sub/sd_ass.c3
-rw-r--r--wscript_build.py1
7 files changed, 146 insertions, 0 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 65626b80c0..c540400c74 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -2826,6 +2826,11 @@ Subtitles
include replacing the regexes with a very primitive and small subset of
sed, or some method to control case-sensitivity.
+``--sub-filter-jsre-...=...``
+ Same as ``--sub-filter-regex`` but with JavaScript regular expressions.
+ Shares/affected-by all ``--sub-filter-regex-*`` control options (see below),
+ and also experimental. Requires only JavaScript support.
+
``--sub-filter-regex-warn=<yes|no>``
Log dropped lines with warning log level, instead of verbose (default: no).
Helpful for testing.
diff --git a/options/options.c b/options/options.c
index fac31a2bb9..465ac9a35f 100644
--- a/options/options.c
+++ b/options/options.c
@@ -219,6 +219,7 @@ const struct m_sub_options mp_sub_filter_opts = {
{"sub-filter-sdh-harder", OPT_FLAG(sub_filter_SDH_harder)},
{"sub-filter-regex-enable", OPT_FLAG(rf_enable)},
{"sub-filter-regex", OPT_STRINGLIST(rf_items)},
+ {"sub-filter-jsre", OPT_STRINGLIST(jsre_items)},
{"sub-filter-regex-warn", OPT_FLAG(rf_warn)},
{0}
},
diff --git a/options/options.h b/options/options.h
index 41aa88abb9..7963d6bd28 100644
--- a/options/options.h
+++ b/options/options.h
@@ -115,6 +115,7 @@ struct mp_sub_filter_opts {
int sub_filter_SDH_harder;
int rf_enable;
char **rf_items;
+ char **jsre_items;
int rf_warn;
};
diff --git a/sub/filter_jsre.c b/sub/filter_jsre.c
new file mode 100644
index 0000000000..896382714a
--- /dev/null
+++ b/sub/filter_jsre.c
@@ -0,0 +1,134 @@
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <mujs.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "misc/bstr.h"
+#include "options/options.h"
+#include "sd.h"
+
+
+// p_NAME are protected functions (never throw) which interact with the JS VM.
+// return 0 on successful interaction, not-0 on (caught) js-error.
+// on error: stack is the same as on entry + an error value
+
+// js: global[n] = new RegExp(str, flags)
+static int p_regcomp(js_State *J, int n, const char *str, int flags)
+{
+ if (js_try(J))
+ return 1;
+
+ js_pushnumber(J, n); // n
+ js_newregexp(J, str, flags); // n regex
+ js_setglobal(J, js_tostring(J, -2)); // n (and global[n] is the regex)
+ js_pop(J, 1);
+
+ js_endtry(J);
+ return 0;
+}
+
+// js: found = global[n].test(text)
+static int p_regexec(js_State *J, int n, const char *text, int *found)
+{
+ if (js_try(J))
+ return 1;
+
+ js_pushnumber(J, n); // n
+ js_getglobal(J, js_tostring(J, -1)); // n global[n]
+ js_getproperty(J, -1, "test"); // n global[n] global[n].test
+ js_rot2(J); // n global[n].test global[n] (n, test(), and its `this')
+ js_pushstring(J, text); // n global[n].test global[n] text
+ js_call(J, 1); // n test-result
+ *found = js_toboolean(J, -1);
+ js_pop(J, 2); // the result and n
+
+ js_endtry(J);
+ return 0;
+}
+
+// protected. caller should pop the error after using the result string.
+static const char *get_err(js_State *J)
+{
+ return js_trystring(J, -1, "unknown error");
+}
+
+
+struct priv {
+ js_State *J;
+ int num_regexes;
+ int offset;
+};
+
+static void destruct_priv(void *p)
+{
+ js_freestate(((struct priv *)p)->J);
+}
+
+static bool jsre_init(struct sd_filter *ft)
+{
+ if (strcmp(ft->codec, "ass") != 0)
+ return false;
+
+ if (!ft->opts->rf_enable)
+ return false;
+
+ struct priv *p = talloc_zero(ft, struct priv);
+ ft->priv = p;
+
+ p->J = js_newstate(0, 0, JS_STRICT);
+ if (!p->J) {
+ MP_ERR(ft, "jsre: VM init error\n");
+ return false;
+ }
+ talloc_set_destructor(p, destruct_priv);
+
+ for (int n = 0; ft->opts->jsre_items && ft->opts->jsre_items[n]; n++) {
+ char *item = ft->opts->jsre_items[n];
+
+ int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I);
+ if (err) {
+ MP_ERR(ft, "jsre: %s -- '%s'\n", get_err(p->J), item);
+ js_pop(p->J, 1);
+ continue;
+ }
+
+ p->num_regexes += 1;
+ }
+
+ if (!p->num_regexes)
+ return false;
+
+ p->offset = sd_ass_fmt_offset(ft->event_format);
+ return true;
+}
+
+static struct demux_packet *jsre_filter(struct sd_filter *ft,
+ struct demux_packet *pkt)
+{
+ struct priv *p = ft->priv;
+ char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset));
+ bool drop = false;
+
+ for (int n = 0; n < p->num_regexes; n++) {
+ int found, err = p_regexec(p->J, n, text, &found);
+ if (err == 0 && found) {
+ int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V;
+ MP_MSG(ft, level, "jsre: regex %d => drop: '%s'\n", n, text);
+ drop = true;
+ break;
+ } else if (err) {
+ MP_WARN(ft, "jsre: test regex %d: %s.\n", n, get_err(p->J));
+ js_pop(p->J, 1);
+ }
+ }
+
+ talloc_free(text);
+ return drop ? NULL : pkt;
+}
+
+const struct sd_filter_functions sd_filter_jsre = {
+ .init = jsre_init,
+ .filter = jsre_filter,
+};
diff --git a/sub/sd.h b/sub/sd.h
index 2d107d1769..2e8d71ba79 100644
--- a/sub/sd.h
+++ b/sub/sd.h
@@ -88,6 +88,7 @@ struct sd_filter_functions {
extern const struct sd_filter_functions sd_filter_sdh;
extern const struct sd_filter_functions sd_filter_regex;
+extern const struct sd_filter_functions sd_filter_jsre;
// convenience utils for filters with ass codec
diff --git a/sub/sd_ass.c b/sub/sd_ass.c
index 0da6df41c8..e100b5c5e0 100644
--- a/sub/sd_ass.c
+++ b/sub/sd_ass.c
@@ -68,6 +68,9 @@ static const struct sd_filter_functions *const filters[] = {
#if HAVE_POSIX
&sd_filter_regex,
#endif
+#if HAVE_JAVASCRIPT
+ &sd_filter_jsre,
+#endif
NULL,
};
diff --git a/wscript_build.py b/wscript_build.py
index fbec5006f8..384bb50d2e 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -376,6 +376,7 @@ def build(ctx):
( "sub/dec_sub.c" ),
( "sub/draw_bmp.c" ),
( "sub/filter_regex.c", "posix" ),
+ ( "sub/filter_jsre.c", "javascript" ),
( "sub/filter_sdh.c" ),
( "sub/img_convert.c" ),
( "sub/lavc_conv.c" ),