summaryrefslogtreecommitdiffstats
path: root/sub/filter_sdh.c
diff options
context:
space:
mode:
authorDudemanguy <random342@airmail.cc>2023-11-05 12:16:48 -0600
committerDudemanguy <random342@airmail.cc>2023-12-08 18:14:06 +0000
commitb7d85f0d4a5330cb3f433cd0cb4c977e10a168f7 (patch)
tree8072f3f11fecbc91f786c5b93869cbf515cc2afe /sub/filter_sdh.c
parent2193893be7ed5d9ea5089552b5c2e18c8ffb0f84 (diff)
downloadmpv-b7d85f0d4a5330cb3f433cd0cb4c977e10a168f7.tar.bz2
mpv-b7d85f0d4a5330cb3f433cd0cb4c977e10a168f7.tar.xz
filter_sdh: combine skip_bracketed and skip_parenthesized
These two functions are almost exactly the same. The parenthesis variant is essentially just a special case with more conditions to not remove text. These can easily be combined together into one generic skip_enclosed function to handle both cases. We also use char * instead of char for the character comparison here since not everything is neccesarily 1 byte and can fit into a char. This will be useful for the following commits where we extend this logic further.
Diffstat (limited to 'sub/filter_sdh.c')
-rw-r--r--sub/filter_sdh.c95
1 files changed, 28 insertions, 67 deletions
diff --git a/sub/filter_sdh.c b/sub/filter_sdh.c
index 69fca9f892..a3dddfc2b5 100644
--- a/sub/filter_sdh.c
+++ b/sub/filter_sdh.c
@@ -86,7 +86,8 @@ static void copy_ass(struct sd_filter *sd, char **rpp, struct buffer *buf)
return;
}
-static bool skip_bracketed(struct sd_filter *sd, char **rpp, struct buffer *buf);
+static bool skip_enclosed(struct sd_filter *sd, char **rpp, struct buffer *buf,
+ const char *left, const char *right);
// check for speaker label, like MAN:
// normal subtitles may include mixed case text with : after so
@@ -128,7 +129,7 @@ static void skip_speaker_label(struct sd_filter *sd, char **rpp, struct buffer *
copy_ass(sd, &rp, buf);
} else if (rp[0] == '[') {
// not uncommon with [xxxx]: which should also be skipped
- if (!skip_bracketed(sd, &rp, buf)) {
+ if (!skip_enclosed(sd, &rp, buf, "[", "]")) {
buf->pos = old_pos;
return;
}
@@ -174,94 +175,54 @@ static void skip_speaker_label(struct sd_filter *sd, char **rpp, struct buffer *
return;
}
-// check for bracketed text, like [SOUND]
-// and skip it while preserving ass tags
-// any characters are allowed, brackets are seldom used in normal text
+// Check for text enclosed in symbols, like (SOUND)
+// and skip it while preserving ass tags.
+// Parentheses are a special case since normal subtitles may have
+// them so only upper case is accepted and lower case l which for
+// some looks like upper case I. If sub_filter_SDH_harder is used,
+// both upper and lower case is accepted.
//
-// Parameters:
-// rpp read pointer pointer to source string, updated on return
-// buf write buffer
-//
-// scan in source string
-// the first character in source string must by the starting '['
-// and copy ass tags to destination string but
-// skipping bracketed text if it looks like SDH
-//
-// return true if bracketed text was removed.
-// if not valid SDH read pointer and write buffer position will be unchanged
-// otherwise they point to next position after text and next write position
-static bool skip_bracketed(struct sd_filter *sd, char **rpp, struct buffer *buf)
-{
- char *rp = *rpp;
- int old_pos = buf->pos;
-
- rp++; // skip past '['
- // skip past valid data searching for ]
- while (*rp && rp[0] != ']') {
- if (rp[0] == '{') {
- copy_ass(sd, &rp, buf);
- } else {
- rp++;
- }
- }
- if (!*rp) {
- // ] was not found
- buf->pos = old_pos;
- return false;
- }
- rp++; // skip ]
- // skip trailing spaces
- while (rp[0] == ' ') {
- rp++;
- }
- *rpp = rp;
-
- return true;
-}
-
-// check for parenthesized text, like (SOUND)
-// and skip it while preserving ass tags
-// normal subtitles may include mixed case text in parentheses so
-// only upper case is accepted and lower case l which for some
-// looks like upper case I but if requested harder filtering
-// both upper and lower case is accepted
+// For other symbols, all text in between is removed.
//
// Parameters:
// rpp read pointer pointer to source string, updated on return
// buf write buffer
//
// scan in source string
-// the first character in source string must be the starting '('
+// the first character in source string must be the starting left symbol
// and copy ass tags to destination string but
-// skipping parenthesized text if it looks like SDH
+// skipping enclosed text if it looks like SDH
//
-// return true if parenthesized text was removed.
+// return true if enclosed text was removed.
// if not valid SDH read pointer and write buffer position will be unchanged
// otherwise they point to next position after text and next write position
-static bool skip_parenthesized(struct sd_filter *sd, char **rpp, struct buffer *buf)
+static bool skip_enclosed(struct sd_filter *sd, char **rpp, struct buffer *buf,
+ const char *left, const char *right)
{
- int filter_harder = sd->opts->sub_filter_SDH_harder;
+ bool filter_harder = sd->opts->sub_filter_SDH_harder;
char *rp = *rpp;
int old_pos = buf->pos;
- rp++; // skip past '('
- // skip past valid data searching for )
- bool only_digits = true;
- while (*rp && rp[0] != ')') {
+ rp++; // skip past the left character
+ // skip past valid data searching for the right character
+ bool only_digits = strcmp(left, "(") == 0;
+ while (*rp && rp[0] != right[0]) {
if (rp[0] == '{') {
copy_ass(sd, &rp, buf);
- } else if ((mp_isalpha(rp[0]) &&
+ } else if (strcmp(left, "(") == 0 && ((mp_isalpha(rp[0]) &&
(filter_harder || mp_isupper(rp[0]) || rp[0] == 'l')) ||
mp_isdigit(rp[0]) ||
rp[0] == ' ' || rp[0] == '\'' || rp[0] == '#' ||
rp[0] == '.' || rp[0] == ',' ||
- rp[0] == '-' || rp[0] == '"' || rp[0] == '\\') {
+ rp[0] == '-' || rp[0] == '"' || rp[0] == '\\')) {
if (!mp_isdigit(rp[0]))
only_digits = false;
rp++;
- } else {
+ } else if (strcmp(left, "(") == 0) {
buf->pos = old_pos;
return false;
+ } else {
+ rp++;
}
}
if (!*rp) {
@@ -274,7 +235,7 @@ static bool skip_parenthesized(struct sd_filter *sd, char **rpp, struct buffer *
buf->pos = old_pos;
return false;
}
- rp++; // skip )
+ rp++; // skip right character
// skip trailing spaces
while (rp[0] == ' ') {
rp++;
@@ -372,13 +333,13 @@ static char *filter_SDH(struct sd_filter *sd, char *data, int length, ptrdiff_t
while (*rp && !(rp[0] == '\\' && rp[1] == 'N')) {
copy_ass(sd, &rp, buf);
if (rp[0] == '[') {
- if (!skip_bracketed(sd, &rp, buf)) {
+ if (!skip_enclosed(sd, &rp, buf, "[", "]")) {
append(sd, buf, rp[0]);
rp++;
line_with_text = true;
}
} else if (rp[0] == '(') {
- if (!skip_parenthesized(sd, &rp, buf)) {
+ if (!skip_enclosed(sd, &rp, buf, "(", ")")) {
append(sd, buf, rp[0]);
rp++;
line_with_text = true;