diff options
-rwxr-xr-x | TOOLS/file2string.pl | 24 | ||||
-rwxr-xr-x | TOOLS/file2string.py | 27 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska.pm | 30 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Definitions.pm | 384 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Element.pm | 331 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Reader.pm | 426 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Utils.pm | 37 | ||||
-rwxr-xr-x | TOOLS/matroska.pl | 169 | ||||
-rwxr-xr-x | TOOLS/matroska.py | 463 | ||||
-rw-r--r-- | waftools/generators/sources.py | 4 | ||||
-rw-r--r-- | wscript | 4 |
11 files changed, 495 insertions, 1404 deletions
diff --git a/TOOLS/file2string.pl b/TOOLS/file2string.pl deleted file mode 100755 index 341bb06fd6..0000000000 --- a/TOOLS/file2string.pl +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/env perl - -use strict; -use warnings; - -# Convert the contents of a file into a C string constant. -# Note that the compiler will implicitly add an extra 0 byte at the end -# of every string, so code using the string may need to remove that to get -# the exact contents of the original file. -# FIXME: why not a char array? - -# treat only alphanumeric and punctuations (excluding " and ?) as safe -my $unsafe_chars = qr{[^][A-Za-z0-9!#%&'()*+,./:;<=>^_{|}~ -]}; - -for my $file (@ARGV) { - open my $fh, '<:raw', $file or next; - print "/* Generated from $file */\n"; - while (<$fh>) { - # replace unsafe chars with their equivalent octal escapes - s/($unsafe_chars)/\\@{[sprintf '%03o', ord($1)]}/gos; - print "\"$_\"\n" - } - close $fh; -} diff --git a/TOOLS/file2string.py b/TOOLS/file2string.py new file mode 100755 index 0000000000..6cdd1a72ae --- /dev/null +++ b/TOOLS/file2string.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +# Convert the contents of a file into a C string constant. +# Note that the compiler will implicitly add an extra 0 byte at the end +# of every string, so code using the string may need to remove that to get +# the exact contents of the original file. + +import sys + +# Indexing a byte string yields int on Python 3.x, and a str on Python 2.x +def pord(c): + return ord(c) if type(c) == str else c + +def main(infile): + conv = ['\\' + ("%03o" % c) for c in range(256)] + safe_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \ + "0123456789!#%&'()*+,-./:;<=>?[]^_{|}~ " + for c in safe_chars: + conv[ord(c)] = c + for c, esc in ("\nn", "\tt", r"\\", '""'): + conv[ord(c)] = '\\' + esc + for line in infile: + sys.stdout.write('"' + ''.join(conv[pord(c)] for c in line) + '"\n') + +with open(sys.argv[1], 'rb') as infile: + sys.stdout.write("// Generated from %s\n\n" % sys.argv[1]) + main(infile) diff --git a/TOOLS/lib/Parse/Matroska.pm b/TOOLS/lib/Parse/Matroska.pm deleted file mode 100644 index e1c08c9814..0000000000 --- a/TOOLS/lib/Parse/Matroska.pm +++ /dev/null @@ -1,30 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: Module collection to parse Matroska files. -package Parse::Matroska; - -=head1 DESCRIPTION - -C<use>s L<Parse::Matroska::Reader>. See the documentation -of the modules mentioned in L</"SEE ALSO"> for more information -in how to use this module. - -It's intended for this module to contain high-level interfaces -to the other modules in the distribution. - -=head1 SOURCE CODE - -L<https://github.com/Kovensky/Parse-Matroska> - -=head1 SEE ALSO - -L<Parse::Matroska::Reader>, L<Parse::Matroska::Element>, -L<Parse::Matroska::Definitions>. - -=cut - -use Parse::Matroska::Reader; - -1; diff --git a/TOOLS/lib/Parse/Matroska/Definitions.pm b/TOOLS/lib/Parse/Matroska/Definitions.pm deleted file mode 100644 index 5a5adcd6de..0000000000 --- a/TOOLS/lib/Parse/Matroska/Definitions.pm +++ /dev/null @@ -1,384 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: internal EBML grammar definitions -package Parse::Matroska::Definitions; - -use Parse::Matroska::Utils qw{uniq uncamelize}; - -use Exporter; -our @ISA = qw{Exporter}; -our @EXPORT_OK = qw{elem_by_hexid %EBML_DEFINITION %MATROSKA_DEFINITION}; - -=head1 SYNOPSIS - - use Parse::Matroska::Definitions qw{elem_by_hexid}; - my $ebml_id = elem_by_hexid('1a45dfa3'); - print "EBML ID $ebml_id->{elid}'s name: $ebml_id->{name}"; - -=head1 DESCRIPTION - -Contains the definition of the EBML grammar as expected in -Matroska files. This module is meant mostly for internal use. - -As this was extended from a script in mpv-player, some data -generated is apparently useless for regular module users -but is still relevant to the mpv-player script. Such data -is annotated as being for mpv compatibility. - -=head1 NOTE - -The API of this module is not yet considered stable. - -=head1 GLOBALS - -These global variables are considered B<immutable>. - -=head2 @Parse::Matroska::Definitions::global_elem_list - -A global list of known matroska elements. Useful for -mpv's matroska script, used for generating C headers -that parse matroska. - -=head2 %Parse::Matroska::Definitions::global_elem_dict - -A global hash of known matroska elements. Used internally -by L</elem_by_hexid($id)>. - -=cut - -@Parse::Matroska::Definitions::global_elem_list = (); -%Parse::Matroska::Definitions::global_elem_dict = (); - -=head2 %EBML_DEFINITION - -Optionally-importable hash of known EBML IDs belonging -to the EBML generic grammar. - -=head2 %MATROSKA_DEFINITION - -Optionally-importable hash of known EBML IDs belonging -to the Matroska-specific grammar. - -=cut - -our %EBML_DEFINITION = define_ebml(); -our %MATROSKA_DEFINITION = define_matroska(); - -=method elem_by_hexid($id) - -Returns an EBML Element Definition corresponding to the provided -hexadecimal string. Returns C<undef> if the element is unknown. - -=cut -sub elem_by_hexid { - my ($elid) = @_; - return $Parse::Matroska::Definitions::global_elem_dict{$elid}; -} - -################################################ -### Helper functions for document definition ### -################################################ - -# used by elem when setting the 'valname' key -use constant TYPE_MAP => { - uint => 'uint64_t', - str => 'char *', - binary => 'struct bstr', - ebml_id => 'uint32_t', - float => 'double', - sint => 'int64_t', -}; - -# this will be localized to "MATROSKA" or "EBML" on the elem declarations -our $ELEM_DEFINE_TYPE = undef; - -=method elem($name,$elid,$valtype) - -NOTE: never call this function yourself; it changes data structures -that are considered immutable outside of this package. - -Internal API function that generates the EBML Element Definitions. - -This API function returns an array which first element is C<$elid> -and the second is a generated hash. The generated hash is stored -in the @global_elem_list and %global_elem_dict. - -The generated hash contains: - -=for :list -= name -The EBML Element's name, given through C<$name>. -= elid -The EBML Element's hex id, given through C<$elid>. Used for lookups by L</elem_by_hexid($id)>. -= valtype -The EBML Element's type, given through C<$valtype>, except when C<$valtype> is an arrayref. -= multiple -If C<$name> ends with a C<*>, this is set as true and strips the C<*> from L</name>. Used to -mark elements that may be repeated. -= subelements -An arrayref of elements that may be children of this element, given through C<$valtype> if it -is an arrayref. Sets L</valtype> to C<sub> if there are subelements. -= subids -An arrayref listing all the L</elid>s of subelements, C<uniq>ified. - -The following elements are for mpv compatibility: - -=for :list -= definename -Name used for generating C #defines. -= fieldname -Name used for generating C struct fields. -= structname -Name used for generating C struct names. -= ebmltype -A pre-#defined constant to describe the element's type. -= valname -Typename used when declaring a struct field referring to this element. - -=cut -sub elem { - my %e = (name => shift, elid => shift, valtype => shift); - - # strip * from name, set 'multiple' if there was one - $e{multiple} = scalar $e{name} =~ s/\*$//; - - # ELEM_DEFINE_TYPE is either MATROSKA or EBML - $e{definename} = "${ELEM_DEFINE_TYPE}_ID_".uc($e{name}); - $e{fieldname} = uncamelize $e{name}; - $e{structname} = "ebml_$e{fieldname}"; - - if (ref $e{valtype} eq 'HASH') { - $e{subelements} = $e{valtype}; - $e{subids} = uniq map { $_->{elid} } values %{$e{subelements}}; - $e{valtype} = 'sub'; - $e{ebmltype} = 'EBML_TYPE_SUBELEMENTS'; - $e{valname} = "struct $e{structname}"; - } else { - $e{ebmltype} = "EBML_TYPE_\U$e{valtype}"; - die "Unrecognized value type $e{valtype}" unless - defined ($e{valname} = TYPE_MAP->{$e{valtype}}); - } - my $e = \%e; - push @Parse::Matroska::Definitions::global_elem_list, $e; - $Parse::Matroska::Definitions::global_elem_dict{$e{elid}} = $e; - return ($e{elid}, $e); -} - -############################################# -### EBML and Matroska document definitons ### -############################################# - -=method define_ebml - -Internal function that defines the EBML generic grammar. - -Must not be called from outside the package. - -=cut -sub define_ebml { - local $ELEM_DEFINE_TYPE = 'EBML'; - return ( - elem('EBML', '1a45dfa3', { - elem('EBMLVersion', '4286', 'uint'), - elem('EBMLReadVersion', '42f7', 'uint'), - elem('EBMLMaxIDLength', '42f2', 'uint'), - elem('EBMLMaxSizeLength', '42f3', 'uint'), - elem('DocType', '4282', 'str'), - elem('DocTypeVersion', '4287', 'uint'), - elem('DocTypeReadVersion', '4285', 'uint'), - }), - - elem('CRC32', 'bf', 'binary'), - elem('Void', 'ec', 'binary'), - ); -} - - -=method define_matroska - -Internal function that defines the Matroska-specific EBML grammar. - -Must not be called from outside the package. - -=cut -sub define_matroska { - local $ELEM_DEFINE_TYPE = 'MATROSKA'; - return ( - elem('Segment', '18538067', { - elem('SeekHead*', '114d9b74', { - elem('Seek*', '4dbb', { - elem('SeekID', '53ab', 'ebml_id'), - elem('SeekPosition', '53ac', 'uint'), - }), - }), - - elem('Info*', '1549a966', { - elem('SegmentUID', '73a4', 'binary'), - elem('PrevUID', '3cb923', 'binary'), - elem('NextUID', '3eb923', 'binary'), - elem('TimecodeScale', '2ad7b1', 'uint'), - elem('DateUTC', '4461', 'sint'), - elem('Title', '7ba9', 'str'), - elem('MuxingApp', '4d80', 'str'), - elem('WritingApp', '5741', 'str'), - elem('Duration', '4489', 'float'), - }), - - elem('Cluster*', '1f43b675', { - elem('Timecode', 'e7', 'uint'), - elem('BlockGroup*', 'a0', { - elem('Block', 'a1', 'binary'), - elem('BlockDuration', '9b', 'uint'), - elem('ReferenceBlock*', 'fb', 'sint'), - elem('DiscardPadding', '75A2', 'sint'), - }), - elem('SimpleBlock*', 'a3', 'binary'), - }), - - elem('Tracks*', '1654ae6b', { - elem('TrackEntry*', 'ae', { - elem('TrackNumber', 'd7', 'uint'), - elem('TrackUID', '73c5', 'uint'), - elem('TrackType', '83', 'uint'), - elem('FlagEnabled', 'b9', 'uint'), - elem('FlagDefault', '88', 'uint'), - elem('FlagForced', '55aa', 'uint'), - elem('FlagLacing', '9c', 'uint'), - elem('MinCache', '6de7', 'uint'), - elem('MaxCache', '6df8', 'uint'), - elem('DefaultDuration', '23e383', 'uint'), - elem('TrackTimecodeScale', '23314f', 'float'), - elem('MaxBlockAdditionID', '55ee', 'uint'), - elem('Name', '536e', 'str'), - elem('Language', '22b59c', 'str'), - elem('CodecID', '86', 'str'), - elem('CodecPrivate', '63a2', 'binary'), - elem('CodecName', '258688', 'str'), - elem('CodecDecodeAll', 'aa', 'uint'), - elem('CodecDelay', '56AA', 'uint'), - elem('SeekPreRoll', '56BB', 'uint'), - elem('Video', 'e0', { - elem('FlagInterlaced', '9a', 'uint'), - elem('PixelWidth', 'b0', 'uint'), - elem('PixelHeight', 'ba', 'uint'), - elem('DisplayWidth', '54b0', 'uint'), - elem('DisplayHeight', '54ba', 'uint'), - elem('DisplayUnit', '54b2', 'uint'), - elem('FrameRate', '2383e3', 'float'), - elem('ColourSpace', '2eb524', 'binary'), - elem('StereoMode', '53b8', 'uint'), - elem('Colour', '55B0', { - elem('MatrixCoefficients', '55B1', 'uint'), - elem('BitsPerChannel', '55B2', 'uint'), - elem('ChromaSubsamplingHorz', '55B3', 'uint'), - elem('ChromaSubsamplingVert', '55B4', 'uint'), - elem('CbSubsamplingHorz', '55B5', 'uint'), - elem('CbSubsamplingVert', '55B6', 'uint'), - elem('ChromaSitingHorz', '55B7', 'uint'), - elem('ChromaSitingVert', '55B8', 'uint'), - elem('Range', '55B9', 'uint'), - elem('TransferCharacteristics', '55BA', 'uint'), - elem('Primaries', '55BB', 'uint'), - elem('MaxCLL', '55BC', 'uint'), - elem('MaxFALL', '55BD', 'uint'), - elem('MasteringMetadata', '55D0', { - elem('PrimaryRChromaticityX', '55D1', 'float'), - elem('PrimaryRChromaticityY', '55D2', 'float'), - elem('PrimaryGChromaticityX', '55D3', 'float'), - elem('PrimaryGChromaticityY', '55D4', 'float'), - elem('PrimaryBChromaticityX', '55D5', 'float'), - elem('PrimaryBChromaticityY', '55D6', 'float'), - elem('WhitePointChromaticityX', '55D7', 'float'), - elem('WhitePointChromaticityY', '55D8', 'float'), - elem('LuminanceMax', '55D9', 'float'), - elem('LuminanceMin', '55DA', 'float'), - }), - }), - }), - elem('Audio', 'e1', { - elem('SamplingFrequency', 'b5', 'float'), - elem('OutputSamplingFrequency', '78b5', 'float'), - elem('Channels', '9f', 'uint'), - elem('BitDepth', '6264', 'uint'), - }), - elem('ContentEncodings', '6d80', { - elem('ContentEncoding*', '6240', { - elem('ContentEncodingOrder', '5031', 'uint'), - elem('ContentEncodingScope', '5032', 'uint'), - elem('ContentEncodingType', '5033', 'uint'), - elem('ContentCompression', '5034', { - elem('ContentCompAlgo', '4254', 'uint'), - elem('ContentCompSettings', '4255', 'binary'), - }), - }), - }), - }), - }), - - elem('Cues', '1c53bb6b', { - elem('CuePoint*', 'bb', { - elem('CueTime', 'b3', 'uint'), - elem('CueTrackPositions*', 'b7', { - elem('CueTrack', 'f7', 'uint'), - elem('CueClusterPosition', 'f1', 'uint'), - elem('CueRelativePosition','f0', 'uint'), - elem('CueDuration', 'b2', 'uint'), - }), - }), - }), - - elem('Attachments', '1941a469', { - elem('AttachedFile*', '61a7', { - elem('FileDescription', '467e', 'str'), - elem('FileName', '466e', 'str'), - elem('FileMimeType', '4660', 'str'), - elem('FileData', '465c', 'binary'), - elem('FileUID', '46ae', 'uint'), - }), - }), - - elem('Chapters', '1043a770', { - elem('EditionEntry*', '45b9', { - elem('EditionUID', '45bc', 'uint'), - elem('EditionFlagHidden', '45bd', 'uint'), - elem('EditionFlagDefault', '45db', 'uint'), - elem('EditionFlagOrdered', '45dd', 'uint'), - elem('ChapterAtom*', 'b6', { - elem('ChapterUID', '73c4', 'uint'), - elem('ChapterTimeStart', '91', 'uint'), - elem('ChapterTimeEnd', '92', 'uint'), - elem('ChapterFlagHidden', '98', 'uint'), - elem('ChapterFlagEnabled', '4598', 'uint'), - elem('ChapterSegmentUID', '6e67', 'binary'), - elem('ChapterSegmentEditionUID', '6ebc', 'uint'), - elem('ChapterDisplay*', '80', { - elem('ChapString', '85', 'str'), - elem('ChapLanguage*', '437c', 'str'), - elem('ChapCountry*', '437e', 'str'), - }), - }), - }), - }), - elem('Tags*', '1254c367', { - elem('Tag*', '7373', { - elem('Targets', '63c0', { - elem('TargetTypeValue', '68ca', 'uint'), - elem('TargetTrackUID', '63c5', 'uint'), - elem('TargetEditionUID', '63c9', 'uint'), - elem('TargetChapterUID', '63c4', 'uint'), - elem('TargetAttachmentUID', '63c6', 'uint'), - }), - elem('SimpleTag*', '67c8', { - elem('TagName', '45a3', 'str'), - elem('TagLanguage', '447a', 'str'), - elem('TagString', '4487', 'str'), - }), - }), - }), - }), - ); -} - -1; diff --git a/TOOLS/lib/Parse/Matroska/Element.pm b/TOOLS/lib/Parse/Matroska/Element.pm deleted file mode 100644 index fa0830c11e..0000000000 --- a/TOOLS/lib/Parse/Matroska/Element.pm +++ /dev/null @@ -1,331 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: a mid-level representation of an EBML element -package Parse::Matroska::Element; - -use Carp; -use List::Util qw{first}; - -=head1 SYNOPSIS - - use Parse::Matroska::Reader; - my $reader = Parse::Matroska::Reader->new($path); - my $elem = $reader->read_element; - - print "ID: $elem->{elid}\n"; - print "Name: $elem->{name}\n"; - print "Length: $elem->{content_len}\n"; - print "Type: $elem->{type}\n"; - print "Child count: ", scalar(@{$elem->all_children}), "\n"; - if ($elem->{type} eq 'sub') { - while (my $chld = $elem->next_child) { - print "Child Name: $chld->{name}\n"; - } - } else { - print "Value: ", $elem->get_value, "\n"; - } - -=head1 DESCRIPTION - -Represents a single Matroska element as decoded by -L<Parse::Matroska::Reader>. This is essentially a hash -augmented with functions for delay-loading of binary -values and children elements. - -=head1 NOTE - -The API of this module is not yet considered stable. - -=attr elid - -The EBML Element ID, suitable for passing to -L<Parse::Matroska::Definitions/elem_by_hexid>. - -=attr name - -The EBML Element's name. - -=attr type - -The EBML Element's type. Can be C<uint>, C<sint>, -C<float>, C<ebml_id>, C<str> or C<binary>. See L</value> -for details. - -Equivalent to -C<elem_by_hexid($elem-E<gt>{value})-E<gt>{valtype}>. - -=attr value - -The EBML Element's value. Should be obtained through -L</get_value>. - -Is an unicode string if the L</type> is C<str>, that is, -the string has already been decoded by L<Encode/decode>. - -Is C<undef> if the L</type> is C<binary> and the contents -were delay-loaded and not yet read. L</get_value> will -do the delayed load if needed. - -Is an arrayref if the L</type> is C<sub>, containing -the children nodes that were already loaded. - -Is a hashref if the L</type> is C<ebml_id>, containing -the referred element's information as defined in -L<Parse::Matroska::Definitions>. Calling -C<elem_by_hexid($elem-E<gt>{value}-E<gt>{elid})> will -return the same object as $elem->{value}. - -=attr full_len - -The entire length of this EBML Element, including -the header's. - -=attr size_len - -The length of the size marker. Used when calculating -L</full_len> from L</content_len> - -=attr content_len - -The length of the contents of this EBML Element, -which excludes the header. - -=attr reader - -A weakened reference to the associated -L<Parse::Matroska::Reader>. - -=method new(%hash) - -Creates a new Element initialized with the hash -given as argument. - -=cut -sub new { - my $class = shift; - my $self = {}; - bless $self, $class; - - $self->initialize(@_); - return $self; -} - -=method initialize(%hash) - -Called by L</new> on initialization. - -=cut -sub initialize { - my ($self, %args) = @_; - for (keys %args) { - $self->{$_} = $args{$_}; - } - $self->{depth} = 0 unless $self->{depth}; -} - -=method skip - -Called by the user to ignore the contents of this EBML node. -Needed when ignoring the children of a node. - -=cut -sub skip { - my ($self) = @_; - my $reader = $self->{reader}; - return unless $reader; # we don't have to skip if there's no reader - my $pos = $reader->getpos; - croak "Too late to skip, reads were already done" - if $pos ne $self->{data_pos}; - $reader->skip($self->{content_len}); -} - -=method get_value($keep_bin) - -Returns the value contained by this EBML element. - -If the element has children, returns an arrayref to -the children elements that were already encountered. - -If the element's type is C<binary> and the value was -delay-loaded, does the reading now. - -If $keep_bin is true, the delay-loaded data is kept -as the L</value>, otherwise, further calls to -C<get_value> will reread the data from the L</reader>. - -=cut -sub get_value { - my ($self, $keep_bin) = @_; - - return undef if $self->{type} eq 'skip'; - return $self->{value} if $self->{value}; - - my $reader = $self->{reader} or - croak "The associated Reader has been deleted"; - - # delay-loaded 'binary' - if ($self->{type} eq 'binary') { - croak "Cannot seek in the current Reader" unless $self->{data_pos}; - # seek to the data position... - $reader->setpos($self->{data_pos}); - # read the data, keeping it in value if requested - if ($keep_bin) { - $self->{value} = $reader->readlen($self->{content_len}); - return $self->{value}; - } else { - return $reader->readlen($self->{content_len}); - } - } -} - -=method next_child($read_bin) - -Builtin iterator; reads and returns the next child element. -Always returns undef if the type isn't C<sub>. - -Returns undef at the end of the iterator and resets itself to -point to the first element; so calling L</next_child($read_bin)> -after the iterator returned C<undef> will return the first child. - -The optional C<$read_bin> parameter has the children elements -not delay-load their value if their type is C<binary>. - -If all children elements have already been read, return -each element in-order as would be given by -L</all_children($recurse,$read_bin)>. - -=cut -sub next_child { - my ($self, $read_bin) = @_; - return unless $self->{type} eq 'sub'; - - if ($self->{_all_children_read}) { - my $idx = $self->{_last_child} ||= 0; - if ($idx == @{$self->{value}}) { - # reset the iterator, returning undef once - $self->{_last_child} = 0; - return; - } - my $ret = $self->{value}->[$idx]; - - ++$idx; - $self->{_last_child} = $idx; - return $ret; - } - - my $len = defined $self->{remaining_len} - ? $self->{remaining_len} - : $self->{content_len}; - - if ($len == 0) { - # we've read all children; switch into $self->{value} iteration mode - $self->{_all_children_read} = 1; - # return undef since the iterator will reset - return; - } - - $self->{pos_offset} ||= 0; - my $pos = $self->{data_pos}; - my $reader = $self->{reader} or croak "The associated reader has been deleted"; - $reader->setpos($pos); - $reader->{fh}->seek($self->{pos_offset}, 1) if $pos; - - my $chld = $reader->read_element($read_bin); - return undef unless defined $chld; - $self->{pos_offset} += $chld->{full_len}; - - $self->{remaining_len} = $len - $chld->{full_len}; - - if ($self->{remaining_len} < 0) { - croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained"; - } - - $chld->{depth} = $self->{depth} + 1; - $self->{value} ||= []; - - push @{$self->{value}}, $chld; - - return $chld; -} - -=method all_children($recurse,$read_bin) - -Calls L</populate_children($recurse,$read_bin)> on self -and returns an arrayref with the children nodes. - -Both C<$recurse> and C<$read_bin> are optional and default -to false. - -=cut -sub all_children { - my ($self, $recurse, $read_bin) = @_; - $self->populate_children($recurse, $read_bin); - return $self->{value}; -} - -=method children_by_name($name) - -Searches in the already read children elements for all -elements with the EBML name C<$name>. Returns an array -containing all found elements. On scalar context, -returns only the first element found. - -Croaks if the element's C<type> isn't C<sub>. - -=cut -sub children_by_name { - my ($self, $name) = @_; - return unless defined wantarray; # don't do work if work isn't wanted - croak "Element can't have children" unless $self->{type} eq 'sub'; - - my @found = grep { $_->{name} eq $name } @{$self->{value}}; - return @found if wantarray; # list - return shift @found if defined wantarray; # scalar -} - -=method populate_children($recurse,$read_bin) - -Populates the internal array of children elements, that is, -requests that the associated L<Matroska::Parser::Reader> reads -all children elements. Returns itself. - -Returns false if the element's C<type> isn't C<sub>. - -If C<$recurse> is provided and is true, the method will call -itself in the children elements with the same parameters it -received; this will build a full EBML tree. - -If C<$read_bin> is provided and is true, disables delay-loading -of the contents of C<binary>-type nodes, reading the contents -to memory. - -If both C<$recurse> and C<$read_bin> are true, entire EBML trees -can be loaded without requiring seeks, thus behaving correctly -on unseekable streams. If C<$read_bin> is false, the entire EBML -tree is still loaded, but calling L</get_value> on C<binary>-type -nodes will produce an error on unseekable streams. - -=cut -sub populate_children { - my ($self, $recurse, $read_bin) = @_; - - return unless $self->{type} eq 'sub'; - - if (@{$self->{value}} && $recurse) { - # only recurse - foreach (@{$self->{value}}) { - $_->populate_children($recurse, $read_bin); - } - return $self; - } - - while (my $chld = $self->next_child($read_bin)) { - $chld->populate_children($recurse, $read_bin) if $recurse; - } - - return $self; -} - -1; diff --git a/TOOLS/lib/Parse/Matroska/Reader.pm b/TOOLS/lib/Parse/Matroska/Reader.pm deleted file mode 100644 index 614b7b12c0..0000000000 --- a/TOOLS/lib/Parse/Matroska/Reader.pm +++ /dev/null @@ -1,426 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: a low-level reader for EBML files -package Parse::Matroska::Reader; - -use Parse::Matroska::Definitions qw{elem_by_hexid}; -use Parse::Matroska::Element; - -use Carp; -use Scalar::Util qw{openhandle weaken}; -use IO::Handle; -use IO::File; -use List::Util qw{first}; -use Encode; - -use constant BIGINT_TRY => 'Pari,GMP,FastCalc'; -use Math::BigInt try => BIGINT_TRY; -use Math::BigRat try => BIGINT_TRY; - -=head1 SYNOPSIS - - use Parse::Matroska::Reader; - my $reader = Parse::Matroska::Reader->new($path); - $reader->close; - $reader->open(\$string_with_matroska_data); - - my $elem = $reader->read_element; - print "Element ID: $elem->{elid}\n"; - print "Element name: $elem->{name}\n"; - if ($elem->{type} ne 'sub') { - print "Element value: $elem->get_value\n"; - } else { - while (my $child = $elem->next_child) { - print "Child element: $child->{name}\n"; - } - } - $reader->close; - -=head1 DESCRIPTION - -Reads EBML data, which is used in Matroska files. -This is a low-level reader which is meant to be used as a backend -for higher level readers. TODO: write the high level readers :) - -=head1 NOTE - -The API of this module is not yet considered stable. - -=method new - -Creates a new reader. -Calls L</open($arg)> with its arguments if provided. - -=cut -sub new { - my $class = shift; - my $self = {}; - bless $self, $class; - - $self->open(@_) if @_; - return $self; -} - -=method open($arg) - -Creates the internal filehandle. The argument can be: - -=for :list -* An open filehandle or L<IO::Handle> object. -The filehandle is not C<dup()>ed, so calling L</close> in this -object will close the given filehandle as well. -* A scalar containing a path to a file. -* On perl v5.14 or newer, a scalarref pointing to EBML data. -For similar functionality in older perls, give an L<IO::String> object -or the handle to an already C<open>ed scalarref. - -=cut -sub open { - my ($self, $arg) = @_; - $self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw") - or croak "Can't open $arg: $!"; -} - -=method close - -Closes the internal filehandle. - -=cut -sub close { - my ($self) = @_; - $self->{fh}->close; - delete $self->{fh}; -} - -# equivalent to $self->readlen(1), possibly faster -sub _getc { - my ($self) = @_; - my $c = $self->{fh}->getc; - croak "Can't do read of length 1: $!" if !defined $c && $!; - return $c; -} - -=method readlen($length) - -Reads C<$length> bytes from the internal filehandle. - -=cut -sub readlen { - my ($self, $len) = @_; - my $data; - my $readlen = $self->{fh}->read($data, $len); - croak "Can't do read of length $len: $!" - unless defined $readlen; - return $data; -} - < |