use 5.008; use strict; use warnings; # ABSTRACT: a mid-level representation of an EBML element package Parse::Matroska::Element; use Carp; use List::Util qw{first}; =head1 SYNOPSIS use Parse::Matroska::Reader; my $reader = Parse::Matroska::Reader->new($path); my $elem = $reader->read_element; print "ID: $elem->{elid}\n"; print "Name: $elem->{name}\n"; print "Length: $elem->{content_len}\n"; print "Type: $elem->{type}\n"; print "Child count: ", scalar(@{$elem->all_children}), "\n"; if ($elem->{type} eq 'sub') { while (my $chld = $elem->next_child) { print "Child Name: $chld->{name}\n"; } } else { print "Value: ", $elem->get_value, "\n"; } =head1 DESCRIPTION Represents a single Matroska element as decoded by L. This is essentially a hash augmented with functions for delay-loading of binary values and children elements. =head1 NOTE The API of this module is not yet considered stable. =attr elid The EBML Element ID, suitable for passing to L. =attr name The EBML Element's name. =attr type The EBML Element's type. Can be C, C, C, C, C or C. See L for details. Equivalent to C{value})-E{valtype}>. =attr value The EBML Element's value. Should be obtained through L. Is an unicode string if the L is C, that is, the string has already been decoded by L. Is C if the L is C and the contents were delay-loaded and not yet read. L will do the delayed load if needed. Is an arrayref if the L is C, containing the children nodes that were already loaded. Is a hashref if the L is C, containing the referred element's information as defined in L. Calling C{value}-E{elid})> will return the same object as $elem->{value}. =attr full_len The entire length of this EBML Element, including the header's. =attr size_len The length of the size marker. Used when calculating L from L =attr content_len The length of the contents of this EBML Element, which excludes the header. =attr reader A weakened reference to the associated L. =method new(%hash) Creates a new Element initialized with the hash given as argument. =cut sub new { my $class = shift; my $self = {}; bless $self, $class; $self->initialize(@_); return $self; } =method initialize(%hash) Called by L on initialization. =cut sub initialize { my ($self, %args) = @_; for (keys %args) { $self->{$_} = $args{$_}; } $self->{depth} = 0 unless $self->{depth}; } =method skip Called by the user to ignore the contents of this EBML node. Needed when ignoring the children of a node. =cut sub skip { my ($self) = @_; my $reader = $self->{reader}; return unless $reader; # we don't have to skip if there's no reader my $pos = $reader->getpos; croak "Too late to skip, reads were already done" if $pos ne $self->{data_pos}; $reader->skip($self->{content_len}); } =method get_value($keep_bin) Returns the value contained by this EBML element. If the element has children, returns an arrayref to the children elements that were already encountered. If the element's type is C and the value was delay-loaded, does the reading now. If $keep_bin is true, the delay-loaded data is kept as the L, otherwise, further calls to C will reread the data from the L. =cut sub get_value { my ($self, $keep_bin) = @_; return undef if $self->{type} eq 'skip'; return $self->{value} if $self->{value}; my $reader = $self->{reader} or croak "The associated Reader has been deleted"; # delay-loaded 'binary' if ($self->{type} eq 'binary') { croak "Cannot seek in the current Reader" unless $self->{data_pos}; # seek to the data position... $reader->setpos($self->{data_pos}); # read the data, keeping it in value if requested if ($keep_bin) { $self->{value} = $reader->readlen($self->{content_len}); return $self->{value}; } else { return $reader->readlen($self->{content_len}); } } } =method next_child($read_bin) Builtin iterator; reads and returns the next child element. Always returns undef if the type isn't C. Returns undef at the end of the iterator and resets itself to point to the first element; so calling L after the iterator returned C will return the first child. The optional C<$read_bin> parameter has the children elements not delay-load their value if their type is C. If all children elements have already been read, return each element in-order as would be given by L. =cut sub next_child { my ($self, $read_bin) = @_; return unless $self->{type} eq 'sub'; if ($self->{_all_children_read}) { my $idx = $self->{_last_child} ||= 0; if ($idx == @{$self->{value}}) { # reset the iterator, returning undef once $self->{_last_child} = 0; return; } my $ret = $self->{value}->[$idx]; ++$idx; $self->{_last_child} = $idx; return $ret; } my $len = defined $self->{remaining_len} ? $self->{remaining_len} : $self->{content_len}; if ($len == 0) { # we've read all children; switch into $self->{value} iteration mode $self->{_all_children_read} = 1; # return undef since the iterator will reset return; } $self->{pos_offset} ||= 0; my $pos = $self->{data_pos}; my $reader = $self->{reader} or croak "The associated reader has been deleted"; $reader->setpos($pos); $reader->{fh}->seek($self->{pos_offset}, 1) if $pos; my $chld = $reader->read_element($read_bin); return undef unless defined $chld; $self->{pos_offset} += $chld->{full_len}; $self->{remaining_len} = $len - $chld->{full_len}; if ($self->{remaining_len} < 0) { croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained"; } $chld->{depth} = $self->{depth} + 1; $self->{value} ||= []; push @{$self->{value}}, $chld; return $chld; } =method all_children($recurse,$read_bin) Calls L on self and returns an arrayref with the children nodes. Both C<$recurse> and C<$read_bin> are optional and default to false. =cut sub all_children { my ($self, $recurse, $read_bin) = @_; $self->populate_children($recurse, $read_bin); return $self->{value}; } =method children_by_name($name) Searches in the already read children elements for all elements with the EBML name C<$name>. Returns an array containing all found elements. On scalar context, returns only the first element found. Croaks if the element's C isn't C. =cut sub children_by_name { my ($self, $name) = @_; return unless defined wantarray; # don't do work if work isn't wanted croak "Element can't have children" unless $self->{type} eq 'sub'; my @found = grep { $_->{name} eq $name } @{$self->{value}}; return @found if wantarray; # list return shift @found if defined wantarray; # scalar } =method populate_children($recurse,$read_bin) Populates the internal array of children elements, that is, requests that the associated L reads all children elements. Returns itself. Returns false if the element's C isn't C. If C<$recurse> is provided and is true, the method will call itself in the children elements with the same parameters it received; this will build a full EBML tree. If C<$read_bin> is provided and is true, disables delay-loading of the contents of C-type nodes, reading the contents to memory. If both C<$recurse> and C<$read_bin> are true, entire EBML trees can be loaded without requiring seeks, thus behaving correctly on unseekable streams. If C<$read_bin> is false, the entire EBML tree is still loaded, but calling L on C-type nodes will produce an error on unseekable streams. =cut sub populate_children { my ($self, $recurse, $read_bin) = @_; return unless $self->{type} eq 'sub'; if (@{$self->{value}} && $recurse) { # only recurse foreach (@{$self->{value}}) { $_->populate_children($recurse, $read_bin); } return $self; } while (my $chld = $self->next_child($read_bin)) { $chld->populate_children($recurse, $read_bin) if $recurse; } return $self; } 1;