summaryrefslogtreecommitdiffstats
path: root/TOOLS
diff options
context:
space:
mode:
authoralex <alex@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-06-26 12:35:35 +0000
committeralex <alex@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-06-26 12:35:35 +0000
commita8aec55f160dd129505c39c5970fe43959b2408a (patch)
tree95d761f82616a0a0a8ae5cde46ee024341e6d9a9 /TOOLS
parent731da20a8fd2d9163a550654a1670c0b1b7c719b (diff)
downloadmpv-a8aec55f160dd129505c39c5970fe43959b2408a.tar.bz2
mpv-a8aec55f160dd129505c39c5970fe43959b2408a.tar.xz
simple subtitle editor by Michael Klepikov
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@12687 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'TOOLS')
-rwxr-xr-xTOOLS/subedit.pl445
1 files changed, 445 insertions, 0 deletions
diff --git a/TOOLS/subedit.pl b/TOOLS/subedit.pl
new file mode 100755
index 0000000000..c41d245c34
--- /dev/null
+++ b/TOOLS/subedit.pl
@@ -0,0 +1,445 @@
+#!/usr/bin/perl -w
+
+# A script for pipelined editing of subtitle files.
+# Copyright (C) 2004 Michael Klepikov <mike72@mail.ru>
+#
+# Version 1.0 initial release 28-Mar-04
+#
+# Comments, suggestions -- send me an mail, but the recommended way is
+# to enhance/fix on your own and submit to the distribution;)
+# If you like, I can review the fixes.
+#
+# This script is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+# Retain original credits when modifying.
+#
+# This script is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+use Math::BigInt;
+
+# Constants
+my $FMT_UNKNOWN = 0;
+my $FMT_SRT = 1;
+
+# Argument values
+my $DEBUG = 0;
+my $inFormat;
+my $outFormat;
+my $shiftMilli;
+my $scaleMilli;
+my $splitFromMilli;
+my $splitToMilli;
+
+## Process command line
+while (defined ($argVal = shift)) {
+ if ($argVal eq "-d" || $argVal eq "--debug") {
+ $DEBUG = 1;
+ } elsif ($argVal eq "-if" || $argVal eq "--input-format") {
+ $inFormat = shift;
+ usage ("Must specify input format") if ! $inFormat;
+ if ($inFormat =~ /^srt/i) {
+ $inFormat = $FMT_SRT;
+ } else {
+ usage ("Invalid input format");
+ }
+ } elsif ($argVal eq "-of" || $argVal eq "--output-format") {
+ $outFormat = shift;
+ usage ("Must specify input format") if ! $outFormat;
+ if ($outFormat =~ /^srt/i) {
+ $outFormat = $FMT_SRT;
+ } else {
+ usage ("Invalid output format");
+ }
+ } elsif ($argVal eq "-s" || $argVal eq "--shift") {
+ my $argTime = shift;
+ if (! defined $argTime ||
+ ! defined ($shiftMilli = getTimeMillis ($argTime))) {
+ usage ("Invalid shift time value");
+ }
+ } elsif ($argVal eq "-c" || $argVal eq "--scale") {
+ my $argTime = shift;
+ if (! defined $argTime ||
+ ! defined ($scaleMilli = getTimeMillis ($argTime))) {
+ usage ("Invalid scale time value");
+ }
+ } elsif ($argVal eq "-f" || $argVal eq "--split-from") {
+ my $argTime = shift;
+ if (! defined $argTime ||
+ ! defined ($splitFromMilli = getTimeMillis ($argTime))) {
+ usage ("Invalid split start time value");
+ }
+ } elsif ($argVal eq "-t" || $argVal eq "--split-to") {
+ my $argTime = shift;
+ if (! defined $argTime ||
+ ! defined ($splitToMilli = getTimeMillis ($argTime))) {
+ usage ("Invalid split end time value");
+ }
+ } elsif ($argVal eq "-h" || $argVal eq "--help") {
+ usage ();
+ } else {
+ usage ("Unrecognized argument $argVal");
+ }
+}
+
+# Input format defaults to SRT
+$inFormat = $FMT_SRT if (! defined $inFormat);
+# Output format defaults to the same as input
+$outFormat = $inFormat if (! defined $outFormat);
+
+## Read
+
+my $subs;
+if ($inFormat == $FMT_SRT) {
+ $subs = readSRT (*STDIN);
+ printf STDERR ("Read %d SRT subs\n", scalar @{$subs}) if $DEBUG;
+ # Sort by start time
+ @{$subs} = sort {$a -> {srtStartTime} <=> $b -> {srtEndTime}} @{$subs};
+}
+
+## Transform
+
+if (defined $shiftMilli && 0 != $shiftMilli) {
+ printf STDERR ("Shift: %d milliseconds\n", $shiftMilli) if $DEBUG;
+ shiftSRT ($subs, $shiftMilli);
+}
+
+if (defined $splitFromMilli || defined $splitToMilli) {
+ if ($DEBUG) {
+ my $printFrom = (defined $splitFromMilli) ? $splitFromMilli : "-";
+ my $printTo = (defined $splitToMilli) ? $splitToMilli : "-";
+ printf STDERR ("Split: from $printFrom to $printTo\n");
+ }
+ splitSRT ($subs, $splitFromMilli, $splitToMilli);
+}
+
+if (defined $scaleMilli && 0 != $scaleMilli) {
+ my $lastSubIdx = scalar @{$subs} - 1;
+ if ($lastSubIdx >= 0) {
+ my $lastTimeOrig = $subs -> [$lastSubIdx] -> {srtEndTime};
+ if ($lastTimeOrig == 0) {
+ die "Cannot scale when last subtitle ends at 00:00:00,000";
+ }
+ my $lastTimeScaled = $lastTimeOrig + $scaleMilli;
+ printf STDERR ("Scale: %d/%d\n", $lastTimeScaled, $lastTimeOrig) if $DEBUG;
+ scaleSRT ($subs, $lastTimeScaled, $lastTimeOrig);
+ }
+}
+
+## Write
+if ($outFormat == $FMT_SRT) {
+ writeSRT (*STDOUT, $subs);
+}
+
+# Close STDOUT, as recommended by Perl manual
+# (allows diagnostics on disc overflow, etc.)
+close (STDOUT) || die "Cannot close output stream: $!";
+
+exit 0;
+
+## Subroutines
+
+# Convert string time format to milliseconds
+# SRT style: "01:20:03.251", and "," is allowed instead of "."
+# Return undef in case of format error
+sub getTimeMillis
+{
+ $_ = shift;
+ my $millis = 0;
+
+ if (/\s*(.*)[\.,]([0-9]+)?\s*$/) { # Fraction; strip surrounding spaces
+ #print STDERR "frac: \$1=$1 \$2=$2\n" if $DEBUG;
+ $_ = $1;
+ $millis += ("0." . $2) * 1000 if $2;
+ }
+ if (/(.*?)([0-9]+)$/) { # Seconds
+ #print STDERR "secs: \$1=$1 \$2=$2\n" if $DEBUG;
+ $_ = $1;
+ $millis += $2 * 1000 if $2;
+ }
+ if (/(.*?)([0-9]+):$/) { # Minutes
+ #print STDERR "mins: \$1=$1 \$2=$2\n" if $DEBUG;
+ $_ = $1;
+ $millis += $2 * 60000 if $2;
+ }
+ if (/(.*?)([0-9]+):$/) { # Hours
+ #print STDERR "mins: \$1=$1 \$2=$2\n" if $DEBUG;
+ $_ = $1;
+ $millis += $2 * 3600000 if $2;
+ }
+ if (/(.*?)\-$/) { # Minus sign
+ $_ = $1;
+ $millis *= -1;
+ }
+ $millis = undef if (! /^$/); # Make sure we ate everything up
+ if ($DEBUG) {
+ if (defined $millis) {
+ #print STDERR "time value match: $millis ms\n";
+ } else {
+ #print STDERR "time mismatch\n";
+ }
+ }
+ return $millis;
+}
+
+# Convert milliseconds to SRT formatted string
+sub getTimeSRT
+{
+ my $t = shift;
+ my $tMinus = "";
+ if ($t < 0) {
+ $t = -$t;
+ $tMinus = "-";
+ }
+ my $tMilli = $t % 1000;
+ $t /= 1000;
+ my $tSec = $t % 60;
+ $t /= 60;
+ my $tMin = $t % 60;
+ $t /= 60;
+ my $tHr = $t;
+ return sprintf ("%s%02d:%02d:%02d,%03d",
+ $tMinus, $tHr, $tMin, $tSec, $tMilli);
+}
+
+# Read SRT subtitles
+sub readSRT
+{
+ local *IN = shift;
+ my $subs = [];
+
+ $_ = <IN>;
+ print STDERR "Undefined first line\n" if ! defined $_ && $DEBUG;
+ my $lineNo = 1;
+ READ_SUBS:
+ while (defined $_) {
+ # Each loop iteration reads one subtitle from <IN>
+ my $sub = {};
+
+ # print STDERR "Reading line $lineNo\n" if $DEBUG;
+
+ # Skip empty lines
+ while (/^\s*$/) {
+ last READ_SUBS if ! ($_ = <IN>);
+ ++$lineNo;
+ }
+
+ # Subtitle number
+ if (/^\s*([0-9]+)\s*$/) {
+ $sub -> {srtNumber} = $1;
+ # print "SRT num: $1\n" if $DEBUG;
+ } else {
+ die "Invalid SRT format at line $lineNo";
+ }
+
+ # Timing
+ if ($_ = <IN>) {
+ ++$lineNo;
+ } else {
+ die "Unexpected end of SRT stream at line $lineNo";
+ }
+ # print STDERR "LINE: $_\n" if $DEBUG;
+ if (/^\s*(\S+)\s*--\>\s*(\S+)\s*$/) {
+ my $startMillis = getTimeMillis ($1);
+ my $endMillis = getTimeMillis ($2);
+ die "Invalid SRT timing format at line $lineNo: $_"
+ if ! defined $startMillis || ! defined $endMillis;
+ $sub -> {srtStartTime} = $startMillis;
+ $sub -> {srtEndTime} = $endMillis;
+ } else {
+ die "Invalid SRT timing format at line $lineNo: $_";
+ }
+
+ # Text lines
+ my $subLines = [];
+ while (1) {
+ last if ! ($_ = <IN>); # EOF ends subtitle
+ ++$lineNo;
+ last if /^\s*$/; # Empty line ends subtitle
+ ($_ = $_) =~ s/\s+$//; # Strip trailing spaces
+ push @{$subLines}, $_;
+ }
+ die "No text in SRT subtitle at line $lineNo" if 0 == scalar @{$subLines};
+ $sub -> {lines} = $subLines;
+
+ # Append subtitle to the list
+ push @{$subs}, $sub;
+ }
+ print STDERR "SRT read ok, $lineNo lines\n" if $DEBUG;
+
+ return $subs;
+}
+
+# Write SRT subtitles
+sub writeSRT
+{
+ use integer; # For integer division
+ local *OUT = shift;
+ my $subs = shift;
+
+ my $subNum = 0;
+ foreach (@{$subs}) {
+ ++$subNum;
+
+ my $sub = $_;
+ my $sTimeSRT = getTimeSRT ($sub -> {srtStartTime});
+ my $eTimeSRT = getTimeSRT ($sub -> {srtEndTime});
+ printf OUT ("%d\n%s --> %s\n", $subNum, $sTimeSRT, $eTimeSRT);
+ foreach (@{$sub -> {lines}}) {
+ printf OUT ("%s\n", $_);
+ }
+ printf OUT "\n";
+ }
+ printf STDERR ("Wrote %d SRT subs\n", $subNum) if $DEBUG;
+}
+
+# Shift SRT subtitles by a given number of seconds.
+# The number may be negative and fractional.
+sub shiftSRT
+{
+ use integer; # $shiftMilli could be passed as float
+ my $subs = shift;
+ my $shiftMilli = shift;
+
+ foreach (@{$subs}) {
+ $_ -> {srtStartTime} += $shiftMilli;
+ $_ -> {srtEndTime} += $shiftMilli;
+ }
+}
+
+# Multiply each subtitle timing by a divident and divide by divisor.
+# The idea is that the divident is usually the new total number of
+# milliseconds in the subtitle file, and the divisor is the old
+# total number of milliseconds in the subtitle file.
+# We could simply use a double precision real coefficient instead of
+# integer divident and divisor, and that could be good enough, but
+# using integer arithmetics *guarantees* precision up to the last
+# digit, so why settle for good enough when we can have a guarantee.
+#
+# Uses Math::BigInt arithmetics, because it works with numbers
+# up to (total number of milliseconds for a subtitle timing)^2,
+# which could be on the order of approximately 1e+13, which is
+# larger than maximum 32-bit integer.
+# There is a performance loss when using BigInt vs. regular floating
+# point arithmetics, but the actual performance is quite acceptable
+# on files with a few thousand subtitles.
+sub scaleSRT
+{
+ use integer; # Divident and divisor could be passed as floats, truncate
+ my $subs = shift;
+ my $scaleDividend = shift;
+ my $scaleDivisor = shift;
+
+ foreach (@{$subs}) {
+ my $ss = Math::BigInt -> new ($_ -> {srtStartTime});
+ $ss = $ss -> bmul ($scaleDividend);
+ $_ -> {srtStartTime} = $ss -> bdiv ($scaleDivisor) -> bsstr ();
+ my $se = Math::BigInt -> new ($_ -> {srtEndTime});
+ $se = $se -> bmul ($scaleDividend);
+ $_ -> {srtEndTime} = $se -> bdiv ($scaleDivisor) -> bsstr ();
+ }
+}
+
+# Extract a fragment within a given time interval
+# Either "from" or "to" may be undefined
+sub splitSRT
+{
+ use integer; # fromMilli and toMilli could be passed as floats, truncate
+ my $subs = shift;
+ my $fromMilli = shift;
+ my $toMilli = shift;
+
+ my $iSub = 0;
+ while ($iSub < scalar @{$subs}) {
+ $_ = $subs -> [$iSub];
+ my $keep = 0;
+ if (! defined $fromMilli || $_ -> {srtEndTime} >= $fromMilli) {
+ # The subtitle ends later than the start boundary
+
+ # Fix overlapping start timing,
+ # but only of the start boundary is not infinite (undef)
+ if (defined $fromMilli && $_ -> {srtStartTime} < $fromMilli) {
+ $_ -> {srtStartTime} = $fromMilli;
+ }
+ if (! defined $toMilli || $_ -> {srtStartTime} <= $toMilli) {
+ # The subtitle begins earlier than the end boundary
+
+ # Fix overlapping end timing,
+ # but only of the end boundary is not infinite (undef)
+ if (defined $toMilli && $_ -> {srtEndTime} > $toMilli) {
+ $_ -> {srtEndTime} = $toMilli;
+ }
+
+ # All conditions met, all fixes done
+ $keep = 1;
+ }
+ }
+ if ($keep) {
+ ++$iSub;
+ } else {
+ splice @{$subs}, $iSub, 1;
+ }
+ }
+}
+
+# Print brief usage help
+# Accepts an optional error message, e.g. for errors parsing command line
+sub usage
+{
+ my $msg = shift;
+ my $exitCode = 0;
+
+ if (defined $msg) {
+ $exitCode = 2;
+ print STDERR "$msg\n";
+ }
+
+ print STDERR <<USAGE;
+Usage: $0 [switches]
+ -if,--input-format <fmt> input format; supported: SRT
+ default is SRT
+ -of,--output-format <fmt> output format; supported: SRT
+ default is same as input format
+ -s,--shift <time> shift all subtitles by <time>
+ (format: [-]hh:mm:ss,fraction)
+ -c,--scale <time> scale by adding <time> to overall duration
+ -f,--split-from <time> Drop subtitles that end before <time>
+ -t,--split-to <time> Drop subtitles that start after <time>
+ (will truncate timing if it overlaps a boundary)
+ -r,--renumber renumber SRT subtitles in output
+ -d,--debug enable debug output
+ -h,--help this help message
+
+All times could be negative. Input/output may also contain negative timings,
+which is sometimes useful for intermediate results.
+SRT subtitles are always renumbered on output.
+
+EXAMPLES
+
+Split subtitle file into two disks at a boundary of one hour 15 minutes:
+
+ subedit.pl --split-to 1:15:0 < all.srt > p1.srt
+ subedit.pl -f 1:15:0 < all.srt | subedit.pl --shift -1:15:0 > p2.srt
+
+Join the previous two disks back into one file:
+
+ subedit.pl -s 1:15:00 < p2.srt | cat p1.srt - | subedit.pl > all.srt
+
+Correct a situation where the first subtitle starts in sync with the video,
+but the last one starts 3.5 seconds earlier than the speech in the video,
+assuming the first subtitle timing is 00:01:05.030:
+
+ subedit.pl -s -1:5.03 | subedit.pl -c 3.5 | subedit.pl -s 1:5.03
+USAGE
+
+ exit $exitCode;
+}