summaryrefslogtreecommitdiffstats
path: root/TOOLS/lib/Parse/Matroska/Reader.pm
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2016-12-17 13:24:05 +0100
committerwm4 <wm4@nowhere>2016-12-17 15:43:15 +0100
commitff9f5e06ff203c055d968087956026ef9204218b (patch)
tree883725191398913cc97bd441b7110bcf68237e28 /TOOLS/lib/Parse/Matroska/Reader.pm
parent2b8b17402ed59815019b309051b277ba4de82f3b (diff)
downloadmpv-ff9f5e06ff203c055d968087956026ef9204218b.tar.bz2
mpv-ff9f5e06ff203c055d968087956026ef9204218b.tar.xz
Revert "Port several python scripts to Perl"
This reverts commit fae73079310eef9dce9737f2e37ff4b80c8830ee. Before the waf build system was used, we had a configure script written in shell. To drop the build dependency on Python, someone rewrote the Python scripts we had to Perl. Now the shell configure script is gone, and it makes no sense to have a build dependency on both Perl and Python. This isn't just a straight revert. It adds the new Matroska EBML elements to the old Python scripts, adjusts the waf build system, and of course doesn't add anything back needed by the old build system. It would be better if this used matroska.py/file2string.py directly by importing them as modules, instead of calling them via "python". But for now this is simpler.
Diffstat (limited to 'TOOLS/lib/Parse/Matroska/Reader.pm')
-rw-r--r--TOOLS/lib/Parse/Matroska/Reader.pm426
1 files changed, 0 insertions, 426 deletions
diff --git a/TOOLS/lib/Parse/Matroska/Reader.pm b/TOOLS/lib/Parse/Matroska/Reader.pm
deleted file mode 100644
index 614b7b12c0..0000000000
--- a/TOOLS/lib/Parse/Matroska/Reader.pm
+++ /dev/null
@@ -1,426 +0,0 @@
-use 5.008;
-use strict;
-use warnings;
-
-# ABSTRACT: a low-level reader for EBML files
-package Parse::Matroska::Reader;
-
-use Parse::Matroska::Definitions qw{elem_by_hexid};
-use Parse::Matroska::Element;
-
-use Carp;
-use Scalar::Util qw{openhandle weaken};
-use IO::Handle;
-use IO::File;
-use List::Util qw{first};
-use Encode;
-
-use constant BIGINT_TRY => 'Pari,GMP,FastCalc';
-use Math::BigInt try => BIGINT_TRY;
-use Math::BigRat try => BIGINT_TRY;
-
-=head1 SYNOPSIS
-
- use Parse::Matroska::Reader;
- my $reader = Parse::Matroska::Reader->new($path);
- $reader->close;
- $reader->open(\$string_with_matroska_data);
-
- my $elem = $reader->read_element;
- print "Element ID: $elem->{elid}\n";
- print "Element name: $elem->{name}\n";
- if ($elem->{type} ne 'sub') {
- print "Element value: $elem->get_value\n";
- } else {
- while (my $child = $elem->next_child) {
- print "Child element: $child->{name}\n";
- }
- }
- $reader->close;
-
-=head1 DESCRIPTION
-
-Reads EBML data, which is used in Matroska files.
-This is a low-level reader which is meant to be used as a backend
-for higher level readers. TODO: write the high level readers :)
-
-=head1 NOTE
-
-The API of this module is not yet considered stable.
-
-=method new
-
-Creates a new reader.
-Calls L</open($arg)> with its arguments if provided.
-
-=cut
-sub new {
- my $class = shift;
- my $self = {};
- bless $self, $class;
-
- $self->open(@_) if @_;
- return $self;
-}
-
-=method open($arg)
-
-Creates the internal filehandle. The argument can be:
-
-=for :list
-* An open filehandle or L<IO::Handle> object.
-The filehandle is not C<dup()>ed, so calling L</close> in this
-object will close the given filehandle as well.
-* A scalar containing a path to a file.
-* On perl v5.14 or newer, a scalarref pointing to EBML data.
-For similar functionality in older perls, give an L<IO::String> object
-or the handle to an already C<open>ed scalarref.
-
-=cut
-sub open {
- my ($self, $arg) = @_;
- $self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw")
- or croak "Can't open $arg: $!";
-}
-
-=method close
-
-Closes the internal filehandle.
-
-=cut
-sub close {
- my ($self) = @_;
- $self->{fh}->close;
- delete $self->{fh};
-}
-
-# equivalent to $self->readlen(1), possibly faster
-sub _getc {
- my ($self) = @_;
- my $c = $self->{fh}->getc;
- croak "Can't do read of length 1: $!" if !defined $c && $!;
- return $c;
-}
-
-=method readlen($length)
-
-Reads C<$length> bytes from the internal filehandle.
-
-=cut
-sub readlen {
- my ($self, $len) = @_;
- my $data;
- my $readlen = $self->{fh}->read($data, $len);
- croak "Can't do read of length $len: $!"
- unless defined $readlen;
- return $data;
-}
-
-# converts a byte string into an integer
-# we do so by converting the integer into a hex string (big-endian)
-# and then reading the hex-string into an integer
-sub _bin2int($) {
- my ($bin) = @_;
- # if the length is larger than 3
- # the resulting integer might be larger than INT_MAX
- if (length($bin) > 3) {
- return Math::BigInt->from_hex(unpack("H*", $bin));
- }
- return hex(unpack("H*", $bin));
-}
-
-# creates a floating-point number with the given mantissa and exponent
-sub _ldexp {
- my ($mantissa, $exponent) = @_;
- my $r = new Math::BigRat($mantissa);
- return $r * Math::BigRat->new(2)**$exponent;
-}
-
-# NOTE: the read_* functions are hard to read because they're ports
-# of even harder to read python functions.
-# TODO: make them readable
-
-=method read_id
-
-Reads an EBML ID atom in hexadecimal string format, suitable
-for passing to L<Parse::Matroska::Definitions/elem_by_hexid($id)>.
-
-=cut
-sub read_id {
- my ($self) = @_;
- my $t = $self->_getc;
- return undef unless defined $t;
- my $i = 0;
- my $mask = 1<<7;
-
- if (ord($t) == 0) {
- croak "Matroska Syntax error: first byte of ID was \\0"
- }
- until (ord($t) & $mask) {
- ++$i;
- $mask >>= 1;
- }
- # return hex string of the bytes we just read
- return unpack "H*", ($t . $self->readlen($i));
-}
-
-=method read_size
-
-Reads an EBML Data Size atom, which immediately follows
-an EBML ID atom.
-
-This returns an array consisting of:
-
-=for :list
-0. The length of the Data Size atom.
-1. The value encoded in the Data Size atom, which is the length of all the data following it.
-
-=cut
-sub read_size {
- my ($self) = @_;
- my $t = $self->_getc;
- my $i = 0;
- my $mask = 1<<7;
-
- if (ord($t) == 0) {
- croak "Matroska Syntax error: first byte of data size was \\0"
- }
- until (ord($t) & $mask) {
- ++$i;
- $mask >>= 1;
- }
- $t = $t & chr($mask-1); # strip length bits (keep only significant bits)
- return ($i+1, _bin2int $t . $self->readlen($i));
-}
-
-=method read_str($length)
-
-Reads a string of length C<$length> bytes from the internal filehandle.
-The string is already L<Encode/decode>d from C<UTF-8>, which is the
-standard Matroska string encoding.
-
-=cut
-{
- my $utf8 = find_encoding("UTF-8");
- sub read_str {
- my ($self, $length) = @_;
- return $utf8->decode($self->readlen($length));
- }
-}
-
-=method read_uint($length)
-
-Reads an unsigned integer of length C<$length> bytes
-from the internal filehandle.
-
-Returns a L<Math::BigInt> object if C<$length> is greater
-than 4.
-
-=cut
-sub read_uint {
- my ($self, $length) = @_;
- return _bin2int $self->readlen($length);
-}
-
-=method read_sint($length)
-
-Reads a signed integer of length C<$length> bytes
-from the internal filehandle.
-
-Returns a L<Math::BigInt> object if C<$length> is greater
-than 4.
-
-=cut
-sub read_sint {
- my ($self, $length) = @_;
- my $i = $self->read_uint($length);
-
- # Apply 2's complement to the unsigned int
- my $mask = int(2 ** ($length * 8 - 1));
- # if the most significant bit is set...
- if ($i & $mask) {
- # subtract the MSB twice
- $i -= 2 * $mask;
- }
- return $i;
-}
-
-=method read_float($length)
-
-Reads an IEEE floating point number of length C<$length>
-bytes from the internal filehandle.
-
-Only lengths C<4> and C<8> are supported (C C<float> and C<double>).
-
-=cut
-{
- my $b1 = new Math::BigInt 1;
-
- sub read_float {
- my ($self, $length) = @_;
- my $i = new Math::BigInt $self->read_uint($length)->bstr;
- my $f;
-
- # These evil expressions reinterpret an unsigned int as IEEE binary floats
- if ($length == 4) {
- $f = _ldexp(($i & ((1<<23) - 1)) + (1<<23), ($i>>23 & ((1<<8) - 1)) - 150);
- $f = -$f if $i & ($b1<<31);
- } elsif ($length == 8) {
- $f = _ldexp(($i & (($b1<<52) - 1)) + ($b1<<52), ($i>>52 & ((1<<12) - 1)) - 1075);
- $f = -$f if $i & ($b1<<63);
- } else {
- croak "Matroska Syntax error: unsupported IEEE float byte size $length";
- }
-
- return $f;
- }
-}
-
-=method read_ebml_id($length)
-
-Reads an EBML ID when it's encoded as the data inside another
-EBML element, that is, when the enclosing element's C<type> is
-C<ebml_id>.
-
-This returns a hashref with the EBML element description as
-defined in L<Parse::Matroska::Definitions>.
-
-=cut
-sub read_ebml_id {
- my ($self, $length) = @_;
- return elem_by_hexid(unpack("H*", $self->readlen($length)));
-}
-
-=method skip($length)
-
-Skips C<$length> bytes in the internal filehandle.
-
-=cut
-sub skip {
- my ($self, $len) = @_;
- return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1);
- $self->readlen($len);
- return;
-}
-
-=method getpos
-
-Wrapper for L<IO::Seekable/$io-E<gt>getpos> in the internal filehandle.
-
-Returns undef if the internal filehandle can't C<getpos>.
-
-=cut
-sub getpos {
- my ($self) = @_;
- return undef unless $self->{fh}->can('getpos');
- return $self->{fh}->getpos;
-}
-
-=method setpos($pos)
-
-Wrapper for L<IO::Seekable/$io-E<gt>setpos> in the internal filehandle.
-
-Returns C<undef> if the internal filehandle can't C<setpos>.
-
-Croaks if C<setpos> does not seek to the requested position,
-that is, if calling C<getpos> does not yield the same object
-as the C<$pos> argument.
-
-=cut
-sub setpos {
- my ($self, $pos) = @_;
- return undef unless $pos && $self->{fh}->can('setpos');
-
- my $ret = $self->{fh}->setpos($pos);
- croak "Cannot seek to correct position"
- unless $self->getpos eq $pos;
- return $ret;
-}
-
-=method read_element($read_bin)
-
-Reads a full EBML element from the internal filehandle.
-
-Returns a L<Parse::Matroska::Element> object initialized with
-the read data. If C<read_bin> is not present or is false, will
-delay-load the contents of C<binary> type elements, that is,
-they will only be loaded when calling C<get_value> on the
-returned L<Parse::Matroska::Element> object.
-
-Does not read the children of the element if its type is
-C<sub>. Look into the L<Parse::Matroska::Element> interface
-for details in how to read children elements.
-
-Pass a true C<$read_bin> if the stream being read is not
-seekable (C<getpos> is undef) and the contents of C<binary>
-elements is desired, otherwise seeking errors or internal
-filehandle corruption might occur.
-
-=cut
-sub read_element {
- my ($self, $read_bin) = @_;
- return undef if $self->{fh}->eof;
-
- my $elem_pos = $self->getpos;
-
- my $elid = $self->read_id;
- my $elem_def = elem_by_hexid($elid);
- my ($size_len, $content_len) = $self->read_size;
- my $full_len = length($elid)/2 + $size_len + $content_len;
-
- my $elem = Parse::Matroska::Element->new(
- elid => $elid,
- name => $elem_def && $elem_def->{name},
- type => $elem_def && $elem_def->{valtype},
- size_len => $size_len,
- content_len => $content_len,
- full_len => $full_len,
- reader => $self,
- elem_pos => $elem_pos,
- data_pos => $self->getpos,
- );
- weaken($elem->{reader});
-
- if (defined $elem_def) {
- if ($elem->{type} eq 'sub') {
- $elem->{value} = [];
- } elsif ($elem->{type} eq 'str') {
- $elem->{value} = $self->read_str($content_len);
- } elsif ($elem->{type} eq 'ebml_id') {
- $elem->{value} = $self->read_ebml_id($content_len);
- } elsif ($elem->{type} eq 'uint') {
- $elem->{value} = $self->read_uint($content_len);
- } elsif ($elem->{type} eq 'sint') {
- $elem->{value} = $self->read_sint($content_len);
- } elsif ($elem->{type} eq 'float') {
- $elem->{value} = $self->read_float($content_len);
- } elsif ($elem->{type} eq 'skip') {
- $self->skip($content_len);
- } elsif ($elem->{type} eq 'binary') {
- if ($read_bin) {
- $elem->{value} = $self->readlen($content_len);
- } else {
- $self->skip($content_len);
- }
- } else {
- die "Matroska Definition error: type $elem->{valtype} unknown"
- }
- } else {
- $self->skip($content_len);
- }
- return $elem;
-}
-
-1;
-
-=head1 CAVEATS
-
-Children elements have to be processed as soon as an element
-with children is found, or their children ignored with
-L<Parse::Matroska::Element/skip>. Not doing so doesn't cause
-errors but results in an invalid structure, with constant C<0>
-depth.
-
-To work correctly in unseekable streams, either the contents
-of C<binary>-type elements has to be ignored or the C<read_bin>
-flag to C<read_element> has to be true.