From fae73079310eef9dce9737f2e37ff4b80c8830ee Mon Sep 17 00:00:00 2001 From: Kovensky Date: Wed, 7 Nov 2012 11:49:44 -0300 Subject: Port several python scripts to Perl file2string.pl and vdpau_functions.pl are direct ports. matroska.py was reimplemented as the Parse::Matroska module in CPAN, and matroska.pl was made a client of Parse::Matroska. A copy of Parse::Matroska is included in TOOLS/lib, and matroska.pl looks there first when trying to load the module. osxbundle.py was not ported since I have no means to verify it. Python is always available on OSX though, so there is no harm in removing the check for it on configure. --- TOOLS/file2string.pl | 24 ++ TOOLS/file2string.py | 27 -- TOOLS/lib/Parse/Matroska.pm | 30 +++ TOOLS/lib/Parse/Matroska/Definitions.pm | 350 ++++++++++++++++++++++++++ TOOLS/lib/Parse/Matroska/Element.pm | 331 ++++++++++++++++++++++++ TOOLS/lib/Parse/Matroska/Reader.pm | 423 +++++++++++++++++++++++++++++++ TOOLS/lib/Parse/Matroska/Utils.pm | 37 +++ TOOLS/matroska.pl | 169 +++++++++++++ TOOLS/matroska.py | 429 -------------------------------- TOOLS/vdpau_functions.pl | 74 ++++++ TOOLS/vdpau_functions.py | 64 ----- 11 files changed, 1438 insertions(+), 520 deletions(-) create mode 100755 TOOLS/file2string.pl delete mode 100755 TOOLS/file2string.py create mode 100644 TOOLS/lib/Parse/Matroska.pm create mode 100644 TOOLS/lib/Parse/Matroska/Definitions.pm create mode 100644 TOOLS/lib/Parse/Matroska/Element.pm create mode 100644 TOOLS/lib/Parse/Matroska/Reader.pm create mode 100644 TOOLS/lib/Parse/Matroska/Utils.pm create mode 100755 TOOLS/matroska.pl delete mode 100755 TOOLS/matroska.py create mode 100755 TOOLS/vdpau_functions.pl delete mode 100755 TOOLS/vdpau_functions.py (limited to 'TOOLS') diff --git a/TOOLS/file2string.pl b/TOOLS/file2string.pl new file mode 100755 index 0000000000..d9ad215d6d --- /dev/null +++ b/TOOLS/file2string.pl @@ -0,0 +1,24 @@ +#! /usr/bin/env perl + +use strict; +use warnings; + +# Convert the contents of a file into a C string constant. +# Note that the compiler will implicitly add an extra 0 byte at the end +# of every string, so code using the string may need to remove that to get +# the exact contents of the original file. +# FIXME: why not a char array? + +# treat only alphanumeric and not-" punctuation as safe +my $unsafe_chars = qr{[^][A-Za-z0-9!#%&'()*+,./:;<=>?^_{|}~ -]}; + +for my $file (@ARGV) { + open my $fh, '<:raw', $file or next; + print "/* Generated from $file */\n"; + while (<$fh>) { + # replace unsafe chars with their equivalent octal escapes + s/($unsafe_chars)/\\@{[sprintf '%03o', ord($1)]}/gos; + print "\"$_\"\n" + } + close $fh; +} diff --git a/TOOLS/file2string.py b/TOOLS/file2string.py deleted file mode 100755 index 6cdd1a72ae..0000000000 --- a/TOOLS/file2string.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python - -# Convert the contents of a file into a C string constant. -# Note that the compiler will implicitly add an extra 0 byte at the end -# of every string, so code using the string may need to remove that to get -# the exact contents of the original file. - -import sys - -# Indexing a byte string yields int on Python 3.x, and a str on Python 2.x -def pord(c): - return ord(c) if type(c) == str else c - -def main(infile): - conv = ['\\' + ("%03o" % c) for c in range(256)] - safe_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \ - "0123456789!#%&'()*+,-./:;<=>?[]^_{|}~ " - for c in safe_chars: - conv[ord(c)] = c - for c, esc in ("\nn", "\tt", r"\\", '""'): - conv[ord(c)] = '\\' + esc - for line in infile: - sys.stdout.write('"' + ''.join(conv[pord(c)] for c in line) + '"\n') - -with open(sys.argv[1], 'rb') as infile: - sys.stdout.write("// Generated from %s\n\n" % sys.argv[1]) - main(infile) diff --git a/TOOLS/lib/Parse/Matroska.pm b/TOOLS/lib/Parse/Matroska.pm new file mode 100644 index 0000000000..e1c08c9814 --- /dev/null +++ b/TOOLS/lib/Parse/Matroska.pm @@ -0,0 +1,30 @@ +use 5.008; +use strict; +use warnings; + +# ABSTRACT: Module collection to parse Matroska files. +package Parse::Matroska; + +=head1 DESCRIPTION + +Cs L. See the documentation +of the modules mentioned in L for more information +in how to use this module. + +It's intended for this module to contain high-level interfaces +to the other modules in the distribution. + +=head1 SOURCE CODE + +L + +=head1 SEE ALSO + +L, L, +L. + +=cut + +use Parse::Matroska::Reader; + +1; diff --git a/TOOLS/lib/Parse/Matroska/Definitions.pm b/TOOLS/lib/Parse/Matroska/Definitions.pm new file mode 100644 index 0000000000..9b700a7d20 --- /dev/null +++ b/TOOLS/lib/Parse/Matroska/Definitions.pm @@ -0,0 +1,350 @@ +use 5.008; +use strict; +use warnings; + +# ABSTRACT: internal EBML grammar definitions +package Parse::Matroska::Definitions; + +use Parse::Matroska::Utils qw{uniq uncamelize}; + +use Exporter; +our @ISA = qw{Exporter}; +our @EXPORT_OK = qw{elem_by_hexid %EBML_DEFINITION %MATROSKA_DEFINITION}; + +=head1 SYNOPSIS + + use Parse::Matroska::Definitions qw{elem_by_hexid}; + my $ebml_id = elem_by_hexid('1a45dfa3'); + print "EBML ID $ebml_id->{elid}'s name: $ebml_id->{name}"; + +=head1 DESCRIPTION + +Contains the definition of the EBML grammar as expected in +Matroska files. This module is meant mostly for internal use. + +As this was extended from a script in mpv-player, some data +generated is apparently useless for regular module users +but is still relevant to the mpv-player script. Such data +is annotated as being for mpv compatibility. + +=head1 NOTE + +The API of this module is not yet considered stable. + +=head1 GLOBALS + +These global variables are considered B. + +=head2 @Parse::Matroska::Definitions::global_elem_list + +A global list of known matroska elements. Useful for +mpv's matroska script, used for generating C headers +that parse matroska. + +=head2 %Parse::Matroska::Definitions::global_elem_dict + +A global hash of known matroska elements. Used internally +by L. + +=cut + +@Parse::Matroska::Definitions::global_elem_list = (); +%Parse::Matroska::Definitions::global_elem_dict = (); + +=head2 %EBML_DEFINITION + +Optionally-importable hash of known EBML IDs belonging +to the EBML generic grammar. + +=head2 %MATROSKA_DEFINITION + +Optionally-importable hash of known EBML IDs belonging +to the Matroska-specific grammar. + +=cut + +our %EBML_DEFINITION = define_ebml(); +our %MATROSKA_DEFINITION = define_matroska(); + +=method elem_by_hexid($id) + +Returns an EBML Element Definition corresponding to the provided +hexadecimal string. Returns C if the element is unknown. + +=cut +sub elem_by_hexid { + my ($elid) = @_; + return $Parse::Matroska::Definitions::global_elem_dict{$elid}; +} + +################################################ +### Helper functions for document definition ### +################################################ + +# used by elem when setting the 'valname' key +use constant TYPE_MAP => { + uint => 'uint64_t', + str => 'struct bstr', + binary => 'struct bstr', + ebml_id => 'uint32_t', + float => 'double', + sint => 'int64_t', +}; + +# this will be localized to "MATROSKA" or "EBML" on the elem declarations +our $ELEM_DEFINE_TYPE = undef; + +=method elem($name,$elid,$valtype) + +NOTE: never call this function yourself; it changes data structures +that are considered immutable outside of this package. + +Internal API function that generates the EBML Element Definitions. + +This API function returns an array which first element is C<$elid> +and the second is a generated hash. The generated hash is stored +in the @global_elem_list and %global_elem_dict. + +The generated hash contains: + +=for :list += name +The EBML Element's name, given through C<$name>. += elid +The EBML Element's hex id, given through C<$elid>. Used for lookups by L. += valtype +The EBML Element's type, given through C<$valtype>, except when C<$valtype> is an arrayref. += multiple +If C<$name> ends with a C<*>, this is set as true and strips the C<*> from L. Used to +mark elements that may be repeated. += subelements +An arrayref of elements that may be children of this element, given through C<$valtype> if it +is an arrayref. Sets L to C if there are subelements. += subids +An arrayref listing all the Ls of subelements, Cified. + +The following elements are for mpv compatibility: + +=for :list += definename +Name used for generating C #defines. += fieldname +Name used for generating C struct fields. += structname +Name used for generating C struct names. += ebmltype +A pre-#defined constant to describe the element's type. += valname +Typename used when declaring a struct field referring to this element. + +=cut +sub elem { + my %e = (name => shift, elid => shift, valtype => shift); + + # strip * from name, set 'multiple' if there was one + $e{multiple} = scalar $e{name} =~ s/\*$//; + + # ELEM_DEFINE_TYPE is either MATROSKA or EBML + $e{definename} = "${ELEM_DEFINE_TYPE}_ID_".uc($e{name}); + $e{fieldname} = uncamelize $e{name}; + $e{structname} = "ebml_$e{fieldname}"; + + if (ref $e{valtype} eq 'HASH') { + $e{subelements} = $e{valtype}; + $e{subids} = uniq map { $_->{elid} } values %{$e{subelements}}; + $e{valtype} = 'sub'; + $e{ebmltype} = 'EBML_TYPE_SUBELEMENTS'; + $e{valname} = "struct $e{structname}"; + } else { + $e{ebmltype} = "EBML_TYPE_\U$e{valtype}"; + die "Unrecognized value type $e{valtype}" unless + defined ($e{valname} = TYPE_MAP->{$e{valtype}}); + } + my $e = \%e; + push @Parse::Matroska::Definitions::global_elem_list, $e; + $Parse::Matroska::Definitions::global_elem_dict{$e{elid}} = $e; + return ($e{elid}, $e); +} + +############################################# +### EBML and Matroska document definitons ### +############################################# + +=method define_ebml + +Internal function that defines the EBML generic grammar. + +Must not be called from outside the package. + +=cut +sub define_ebml { + local $ELEM_DEFINE_TYPE = 'EBML'; + return ( + elem('EBML', '1a45dfa3', { + elem('EBMLVersion', '4286', 'uint'), + elem('EBMLReadVersion', '42f7', 'uint'), + elem('EBMLMaxIDLength', '42f2', 'uint'), + elem('EBMLMaxSizeLength', '42f3', 'uint'), + elem('DocType', '4282', 'str'), + elem('DocTypeVersion', '4287', 'uint'), + elem('DocTypeReadVersion', '4285', 'uint'), + }), + + elem('CRC32', 'bf', 'binary'), + elem('Void', 'ec', 'binary'), + ); +} + + +=method define_matroska + +Internal function that defines the Matroska-specific EBML grammar. + +Must not be called from outside the package. + +=cut +sub define_matroska { + local $ELEM_DEFINE_TYPE = 'MATROSKA'; + return ( + elem('Segment', '18538067', { + elem('SeekHead*', '114d9b74', { + elem('Seek*', '4dbb', { + elem('SeekID', '53ab', 'ebml_id'), + elem('SeekPosition', '53ac', 'uint'), + }), + }), + + elem('Info*', '1549a966', { + elem('SegmentUID', '73a4', 'binary'), + elem('PrevUID', '3cb923', 'binary'), + elem('NextUID', '3eb923', 'binary'), + elem('TimecodeScale', '2ad7b1', 'uint'), + elem('DateUTC', '4461', 'sint'), + elem('Title', '7ba9', 'str'), + elem('MuxingApp', '4d80', 'str'), + elem('WritingApp', '5741', 'str'), + elem('Duration', '4489', 'float'), + }), + + elem('Cluster*', '1f43b675', { + elem('Timecode', 'e7', 'uint'), + elem('BlockGroup*', 'a0', { + elem('Block', 'a1', 'binary'), + elem('BlockDuration', '9b', 'uint'), + elem('ReferenceBlock*', 'fb', 'sint'), + }), + elem('SimpleBlock*', 'a3', 'binary'), + }), + + elem('Tracks*', '1654ae6b', { + elem('TrackEntry*', 'ae', { + elem('TrackNumber', 'd7', 'uint'), + elem('TrackUID', '73c5', 'uint'), + elem('TrackType', '83', 'uint'), + elem('FlagEnabled', 'b9', 'uint'), + elem('FlagDefault', '88', 'uint'), + elem('FlagForced', '55aa', 'uint'), + elem('FlagLacing', '9c', 'uint'), + elem('MinCache', '6de7', 'uint'), + elem('MaxCache', '6df8', 'uint'), + elem('DefaultDuration', '23e383', 'uint'), + elem('TrackTimecodeScale', '23314f', 'float'), + elem('MaxBlockAdditionID', '55ee', 'uint'), + elem('Name', '536e', 'str'), + elem('Language', '22b59c', 'str'), + elem('CodecID', '86', 'str'), + elem('CodecPrivate', '63a2', 'binary'), + elem('CodecName', '258688', 'str'), + elem('CodecDecodeAll', 'aa', 'uint'), + elem('Video', 'e0', { + elem('FlagInterlaced', '9a', 'uint'), + elem('PixelWidth', 'b0', 'uint'), + elem('PixelHeight', 'ba', 'uint'), + elem('DisplayWidth', '54b0', 'uint'), + elem('DisplayHeight', '54ba', 'uint'), + elem('DisplayUnit', '54b2', 'uint'), + elem('FrameRate', '2383e3', 'float'), + }), + elem('Audio', 'e1', { + elem('SamplingFrequency', 'b5', 'float'), + elem('OutputSamplingFrequency', '78b5', 'float'), + elem('Channels', '9f', 'uint'), + elem('BitDepth', '6264', 'uint'), + }), + elem('ContentEncodings', '6d80', { + elem('ContentEncoding*', '6240', { + elem('ContentEncodingOrder', '5031', 'uint'), + elem('ContentEncodingScope', '5032', 'uint'), + elem('ContentEncodingType', '5033', 'uint'), + elem('ContentCompression', '5034', { + elem('ContentCompAlgo', '4254', 'uint'), + elem('ContentCompSettings', '4255', 'binary'), + }), + }), + }), + }), + }), + + elem('Cues', '1c53bb6b', { + elem('CuePoint*', 'bb', { + elem('CueTime', 'b3', 'uint'), + elem('CueTrackPositions*', 'b7', { + elem('CueTrack', 'f7', 'uint'), + elem('CueClusterPosition', 'f1', 'uint'), + }), + }), + }), + + elem('Attachments', '1941a469', { + elem('AttachedFile*', '61a7', { + elem('FileDescription', '467e', 'str'), + elem('FileName', '466e', 'str'), + elem('FileMimeType', '4660', 'str'), + elem('FileData', '465c', 'binary'), + elem('FileUID', '46ae', 'uint'), + }), + }), + + elem('Chapters', '1043a770', { + elem('EditionEntry*', '45b9', { + elem('EditionUID', '45bc', 'uint'), + elem('EditionFlagHidden', '45bd', 'uint'), + elem('EditionFlagDefault', '45db', 'uint'), + elem('EditionFlagOrdered', '45dd', 'uint'), + elem('ChapterAtom*', 'b6', { + elem('ChapterUID', '73c4', 'uint'), + elem('ChapterTimeStart', '91', 'uint'), + elem('ChapterTimeEnd', '92', 'uint'), + elem('ChapterFlagHidden', '98', 'uint'), + elem('ChapterFlagEnabled', '4598', 'uint'), + elem('ChapterSegmentUID', '6e67', 'binary'), + elem('ChapterSegmentEditionUID', '6ebc', 'uint'), + elem('ChapterDisplay*', '80', { + elem('ChapString', '85', 'str'), + elem('ChapLanguage*', '437c', 'str'), + elem('ChapCountry*', '437e', 'str'), + }), + }), + }), + }), + elem('Tags*', '1254c367', { + elem('Tag*', '7373', { + elem('Targets', '63c0', { + elem('TargetTypeValue', '68ca', 'uint'), + elem('TargetTrackUID', '63c5', 'uint'), + elem('TargetEditionUID', '63c9', 'uint'), + elem('TargetChapterUID', '63c4', 'uint'), + elem('TargetAttachmentUID', '63c6', 'uint'), + }), + elem('SimpleTag*', '67c8', { + elem('TagName', '45a3', 'str'), + elem('TagLanguage', '447a', 'str'), + elem('TagString', '4487', 'str'), + }), + }), + }), + }), + ); +} + +1; diff --git a/TOOLS/lib/Parse/Matroska/Element.pm b/TOOLS/lib/Parse/Matroska/Element.pm new file mode 100644 index 0000000000..fa0830c11e --- /dev/null +++ b/TOOLS/lib/Parse/Matroska/Element.pm @@ -0,0 +1,331 @@ +use 5.008; +use strict; +use warnings; + +# ABSTRACT: a mid-level representation of an EBML element +package Parse::Matroska::Element; + +use Carp; +use List::Util qw{first}; + +=head1 SYNOPSIS + + use Parse::Matroska::Reader; + my $reader = Parse::Matroska::Reader->new($path); + my $elem = $reader->read_element; + + print "ID: $elem->{elid}\n"; + print "Name: $elem->{name}\n"; + print "Length: $elem->{content_len}\n"; + print "Type: $elem->{type}\n"; + print "Child count: ", scalar(@{$elem->all_children}), "\n"; + if ($elem->{type} eq 'sub') { + while (my $chld = $elem->next_child) { + print "Child Name: $chld->{name}\n"; + } + } else { + print "Value: ", $elem->get_value, "\n"; + } + +=head1 DESCRIPTION + +Represents a single Matroska element as decoded by +L. This is essentially a hash +augmented with functions for delay-loading of binary +values and children elements. + +=head1 NOTE + +The API of this module is not yet considered stable. + +=attr elid + +The EBML Element ID, suitable for passing to +L. + +=attr name + +The EBML Element's name. + +=attr type + +The EBML Element's type. Can be C, C, +C, C, C or C. See L +for details. + +Equivalent to +C{value})-E{valtype}>. + +=attr value + +The EBML Element's value. Should be obtained through +L. + +Is an unicode string if the L is C, that is, +the string has already been decoded by L. + +Is C if the L is C and the contents +were delay-loaded and not yet read. L will +do the delayed load if needed. + +Is an arrayref if the L is C, containing +the children nodes that were already loaded. + +Is a hashref if the L is C, containing +the referred element's information as defined in +L. Calling +C{value}-E{elid})> will +return the same object as $elem->{value}. + +=attr full_len + +The entire length of this EBML Element, including +the header's. + +=attr size_len + +The length of the size marker. Used when calculating +L from L + +=attr content_len + +The length of the contents of this EBML Element, +which excludes the header. + +=attr reader + +A weakened reference to the associated +L. + +=method new(%hash) + +Creates a new Element initialized with the hash +given as argument. + +=cut +sub new { + my $class = shift; + my $self = {}; + bless $self, $class; + + $self->initialize(@_); + return $self; +} + +=method initialize(%hash) + +Called by L on initialization. + +=cut +sub initialize { + my ($self, %args) = @_; + for (keys %args) { + $self->{$_} = $args{$_}; + } + $self->{depth} = 0 unless $self->{depth}; +} + +=method skip + +Called by the user to ignore the contents of this EBML node. +Needed when ignoring the children of a node. + +=cut +sub skip { + my ($self) = @_; + my $reader = $self->{reader}; + return unless $reader; # we don't have to skip if there's no reader + my $pos = $reader->getpos; + croak "Too late to skip, reads were already done" + if $pos ne $self->{data_pos}; + $reader->skip($self->{content_len}); +} + +=method get_value($keep_bin) + +Returns the value contained by this EBML element. + +If the element has children, returns an arrayref to +the children elements that were already encountered. + +If the element's type is C and the value was +delay-loaded, does the reading now. + +If $keep_bin is true, the delay-loaded data is kept +as the L, otherwise, further calls to +C will reread the data from the L. + +=cut +sub get_value { + my ($self, $keep_bin) = @_; + + return undef if $self->{type} eq 'skip'; + return $self->{value} if $self->{value}; + + my $reader = $self->{reader} or + croak "The associated Reader has been deleted"; + + # delay-loaded 'binary' + if ($self->{type} eq 'binary') { + croak "Cannot seek in the current Reader" unless $self->{data_pos}; + # seek to the data position... + $reader->setpos($self->{data_pos}); + # read the data, keeping it in value if requested + if ($keep_bin) { + $self->{value} = $reader->readlen($self->{content_len}); + return $self->{value}; + } else { + return $reader->readlen($self->{content_len}); + } + } +} + +=method next_child($read_bin) + +Builtin iterator; reads and returns the next child element. +Always returns undef if the type isn't C. + +Returns undef at the end of the iterator and resets itself to +point to the first element; so calling L +after the iterator returned C will return the first child. + +The optional C<$read_bin> parameter has the children elements +not delay-load their value if their type is C. + +If all children elements have already been read, return +each element in-order as would be given by +L. + +=cut +sub next_child { + my ($self, $read_bin) = @_; + return unless $self->{type} eq 'sub'; + + if ($self->{_all_children_read}) { + my $idx = $self->{_last_child} ||= 0; + if ($idx == @{$self->{value}}) { + # reset the iterator, returning undef once + $self->{_last_child} = 0; + return; + } + my $ret = $self->{value}->[$idx]; + + ++$idx; + $self->{_last_child} = $idx; + return $ret; + } + + my $len = defined $self->{remaining_len} + ? $self->{remaining_len} + : $self->{content_len}; + + if ($len == 0) { + # we've read all children; switch into $self->{value} iteration mode + $self->{_all_children_read} = 1; + # return undef since the iterator will reset + return; + } + + $self->{pos_offset} ||= 0; + my $pos = $self->{data_pos}; + my $reader = $self->{reader} or croak "The associated reader has been deleted"; + $reader->setpos($pos); + $reader->{fh}->seek($self->{pos_offset}, 1) if $pos; + + my $chld = $reader->read_element($read_bin); + return undef unless defined $chld; + $self->{pos_offset} += $chld->{full_len}; + + $self->{remaining_len} = $len - $chld->{full_len}; + + if ($self->{remaining_len} < 0) { + croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained"; + } + + $chld->{depth} = $self->{depth} + 1; + $self->{value} ||= []; + + push @{$self->{value}}, $chld; + + return $chld; +} + +=method all_children($recurse,$read_bin) + +Calls L on self +and returns an arrayref with the children nodes. + +Both C<$recurse> and C<$read_bin> are optional and default +to false. + +=cut +sub all_children { + my ($self, $recurse, $read_bin) = @_; + $self->populate_children($recurse, $read_bin); + return $self->{value}; +} + +=method children_by_name($name) + +Searches in the already read children elements for all +elements with the EBML name C<$name>. Returns an array +containing all found elements. On scalar context, +returns only the first element found. + +Croaks if the element's C isn't C. + +=cut +sub children_by_name { + my ($self, $name) = @_; + return unless defined wantarray; # don't do work if work isn't wanted + croak "Element can't have children" unless $self->{type} eq 'sub'; + + my @found = grep { $_->{name} eq $name } @{$self->{value}}; + return @found if wantarray; # list + return shift @found if defined wantarray; # scalar +} + +=method populate_children($recurse,$read_bin) + +Populates the internal array of children elements, that is, +requests that the associated L reads +all children elements. Returns itself. + +Returns false if the element's C isn't C. + +If C<$recurse> is provided and is true, the method will call +itself in the children elements with the same parameters it +received; this will build a full EBML tree. + +If C<$read_bin> is provided and is true, disables delay-loading +of the contents of C-type nodes, reading the contents +to memory. + +If both C<$recurse> and C<$read_bin> are true, entire EBML trees +can be loaded without requiring seeks, thus behaving correctly +on unseekable streams. If C<$read_bin> is false, the entire EBML +tree is still loaded, but calling L on C-type +nodes will produce an error on unseekable streams. + +=cut +sub populate_children { + my ($self, $recurse, $read_bin) = @_; + + return unless $self->{type} eq 'sub'; + + if (@{$self->{value}} && $recurse) { + # only recurse + foreach (@{$self->{value}}) { + $_->populate_children($recurse, $read_bin); + } + return $self; + } + + while (my $chld = $self->next_child($read_bin)) { + $chld->populate_children($recurse, $read_bin) if $recurse; + } + + return $self; +} + +1; diff --git a/TOOLS/lib/Parse/Matroska/Reader.pm b/TOOLS/lib/Parse/Matroska/Reader.pm new file mode 100644 index 0000000000..47e67ce5f7 --- /dev/null +++ b/TOOLS/lib/Parse/Matroska/Reader.pm @@ -0,0 +1,423 @@ +use 5.008; +use strict; +use warnings; + +# ABSTRACT: a low-level reader for EBML files +package Parse::Matroska::Reader; + +use Parse::Matroska::Definitions qw{elem_by_hexid}; +use Parse::Matroska::Element; + +use Carp; +use Scalar::Util qw{openhandle weaken}; +use IO::Handle; +use IO::File; +use List::Util qw{first}; +use Encode; + +use constant BIGINT_TRY => 'Pari,GMP,FastCalc'; +use Math::BigInt try => BIGINT_TRY; +use Math::BigRat try => BIGINT_TRY; + +=head1 SYNOPSIS + + use Parse::Matroska::Reader; + my $reader = Parse::Matroska::Reader->new($path); + $reader->close; + $reader->open(\$string_with_matroska_data); + + my $elem = $reader->read_element; + print "Element ID: $elem->{elid}\n"; + print "Element name: $elem->{name}\n"; + if ($elem->{type} ne 'sub') { + print "Element value: $elem->get_value\n"; + } else { + while (my $child = $elem->next_child) { + print "Child element: $child->{name}\n"; + } + } + $reader->close; + +=head1 DESCRIPTION + +Reads EBML data, which is used in Matroska files. +This is a low-level reader which is meant to be used as a backend +for higher level readers. TODO: write the high level readers :) + +=head1 NOTE + +The API of this module is not yet considered stable. + +=method new + +Creates a new reader. +Calls L with its arguments if provided. + +=cut +sub new { + my $class = shift; + my $self = {}; + bless $self, $class; + + $self->open(@_) if @_; + return $self; +} + +=method open($arg) + +Creates the internal filehandle. The argument can be: + +=for :list +* An open filehandle or L object. +The filehandle is not Ced, so calling L in this +object will close the given filehandle as well. +* A scalar containing a path to a file. +* On perl v5.14 or newer, a scalarref pointing to EBML data. +For similar functionality in older perls, give an L object +or the handle to an already Ced scalarref. + +=cut +sub open { + my ($self, $arg) = @_; + $self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw") + or croak "Can't open $arg: $!"; +} + +=method close + +Closes the internal filehandle. + +=cut +sub close { + my ($self) = @_; + $self->{fh}->close; + delete $self->{fh}; +} + +# equivalent to $self->readlen(1), possibly faster +sub _getc { + my ($self) = @_; + my $c = $self->{fh}->getc; + croak "Can't do read of length 1: $!" if !defined $c && $!; + return $c; +} + +=method readlen($length) + +Reads C<$length> bytes from the internal filehandle. + +=cut +sub readlen { + my ($self, $len) = @_; + my $data; + my $readlen = $self->{fh}->read($data, $len); + croak "Can't do read of length $len: $!" + unless defined $readlen; + return $data; +} + +# converts a byte string into an integer +# we do so by converting the integer into a hex string (big-endian) +# and then reading the hex-string into an integer +sub _bin2int($) { + my ($bin) = @_; + # if the length is larger than 3 + # the resulting integer might be larger than INT_MAX + if (length($bin) > 3) { + return Math::BigInt->from_hex(unpack("H*", $bin)); + } + return hex(unpack("H*", $bin)); +} + +# creates a floating-point number with the given mantissa and exponent +sub _ldexp { + my ($mantissa, $exponent) = @_; + return $mantissa * Math::BigRat->new(2)**$exponent; +} + +# NOTE: the read_* functions are hard to read because they're ports +# of even harder to read python functions. +# TODO: make them readable + +=method read_id + +Reads an EBML ID atom in hexadecimal string format, suitable +for passing to L. + +=cut +sub read_id { + my ($self) = @_; + my $t = $self->_getc; + return undef unless defined $t; + my $i = 0; + my $mask = 1<<7; + + if (ord($t) == 0) { + croak "Matroska Syntax error: first byte of ID was \\0" + } + until (ord($t) & $mask) { + ++$i; + $mask >>= 1; + } + # return hex string of the bytes we just read + return unpack "H*", ($t . $self->readlen($i)); +} + +=method read_size + +Reads an EBML Data Size atom, which immediately follows +an EBML ID atom. + +This returns an array consisting of: + +=for :list +0. The length of the Data Size atom. +1. The value encoded in the Data Size atom, which is the length of all the data following it. + +=cut +sub read_size { + my ($self) = @_; + my $t = $self->_getc; + my $i = 0; + my $mask = 1<<7; + + if (ord($t) == 0) { + croak "Matroska Syntax error: first byte of data size was \\0" + } + until (ord($t) & $mask) { + ++$i; + $mask >>= 1; + } + $t = $t & chr($mask-1); # strip length bits (keep only significant bits) + return ($i+1, _bin2int $t . $self->readlen($i)); +} + +=method read_str($length) + +Reads a string of length C<$length> bytes from the internal filehandle. +The string is already Ld from C, which is the +standard Matroska string encoding. + +=cut +{ + my $utf8 = find_encoding("UTF-8"); + sub read_str { + my ($self, $length) = @_; + return $utf8->decode($self->readlen($length)); + } +} + +=method read_uint($length) + +Reads an unsigned integer of length C<$length> bytes +from the internal filehandle. + +Returns a L object if C<$length> is greater +than 4. + +=cut +sub read_uint { + my ($self, $length) = @_; + return _bin2int $self->readlen($length); +} + +=method read_sint($length) + +Reads a signed integer of length C<$length> bytes +from the internal filehandle. + +Returns a L object if C<$length> is greater +than 4. + +=cut +sub read_sint { + my ($self, $length) = @_; + my $i = $self->read_uint($length); + + # Apply 2's complement to the unsigned int + my $mask = int(2 ** ($length * 8 - 1)); + # if the most significant bit is set... + if ($i & $mask) { + # subtract the MSB twice + $i -= 2 * $mask; + } + return $i; +} + +=method read_float($length) + +Reads an IEEE floating point number of length C<$length> +bytes from the internal filehandle. + +Only lengths C<4> and C<8> are supported (C C and C). + +=cut +sub read_float { + my ($self, $length) = @_; + my $i = $self->read_uint($length); + my $f; + + use bigrat try => BIGINT_TRY; + + # These evil expressions reinterpret an unsigned int as IEEE binary floats + if ($length == 4) { + $f = _ldexp(($i & (1<<23 - 1)) + (1<<23), ($i>>23 & (1<<8 - 1)) - 150); + $f = -$f if $i & (1<<31); + } elsif ($length == 8) { + $f = _ldexp(($i & (1<<52 - 1)) + (1<<52), ($i>>52 & (1<<12 - 1)) - 1075); + $f = -$f if $i & (1<<63); + } else { + croak "Matroska Syntax error: unsupported IEEE float byte size $length"; + } + + return $f; +} + +=method read_ebml_id($length) + +Reads an EBML ID when it's encoded as the data inside another +EBML element, that is, when the enclosing element's C is +C. + +This returns a hashref with the EBML element description as +defined in L. + +=cut +sub read_ebml_id { + my ($self, $length) = @_; + return elem_by_hexid(unpack("H*", $self->readlen($length))); +} + +=method skip($length) + +Skips C<$length> bytes in the internal filehandle. + +=cut +sub skip { + my ($self, $len) = @_; + return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1); + $self->readlen($len); + return; +} + +=method getpos + +Wrapper for Lgetpos> in the internal filehandle. + +Returns undef if the internal filehandle can't C. + +=cut +sub getpos { + my ($self) = @_; + return undef unless $self->{fh}->can('getpos'); + return $self->{fh}->getpos; +} + +=method setpos($pos) + +Wrapper for Lsetpos> in the internal filehandle. + +Returns C if the internal filehandle can't C. + +Croaks if C does not seek to the requested position, +that is, if calling C does not yield the same object +as the C<$pos> argument. + +=cut +sub setpos { + my ($self, $pos) = @_; + return undef unless $pos && $self->{fh}->can('setpos'); + + my $ret = $self->{fh}->setpos($pos); + croak "Cannot seek to correct position" + unless $self->getpos eq $pos; + return $ret; +} + +=method read_element($read_bin) + +Reads a full EBML element from the internal filehandle. + +Returns a L object initialized with +the read data. If C is not present or is false, will +delay-load the contents of C type elements, that is, +they will only be loaded when calling C on the +returned L object. + +Does not read the children of the element if its type is +C. Look into the L interface +for details in how to read children elements. + +Pass a true C<$read_bin> if the stream being read is not +seekable (C is undef) and the contents of C +elements is desired, otherwise seeking errors or internal +filehandle corruption might occur. + +=cut +sub read_element { + my ($self, $read_bin) = @_; + return undef if $self->{fh}->eof; + + my $elem_pos = $self->getpos; + + my $elid = $self->read_id; + my $elem_def = elem_by_hexid($elid); + my ($size_len, $content_len) = $self->read_size; + my $full_len = length($elid)/2 + $size_len + $content_len; + + my $elem = Parse::Matroska::Element->new( + elid => $elid, + name => $elem_def && $elem_def->{name}, + type => $elem_def && $elem_def->{valtype}, + size_len => $size_len, + content_len => $content_len, + full_len => $full_len, + reader => $self, + elem_pos => $elem_pos, + data_pos => $self->getpos, + ); + weaken($elem->{reader}); + + if (defined $elem_def) { + if ($elem->{type} eq 'sub') { + $elem->{value} = []; + } elsif ($elem->{type} eq 'str') { + $elem->{value} = $self->read_str($content_len); + } elsif ($elem->{type} eq 'ebml_id') { + $elem->{value} = $self->read_ebml_id($content_len); + } elsif ($elem->{type} eq 'uint') { + $elem->{value} = $self->read_uint($content_len); + } elsif ($elem->{type} eq 'sint') { + $elem->{value} = $self->read_sint($content_len); + } elsif ($elem->{type} eq 'float') { + $elem->{value} = $self->read_float($content_len); + } elsif ($elem->{type} eq 'skip') { + $self->skip($content_len); + } elsif ($elem->{type} eq 'binary') { + if ($read_bin) { + $elem->{value} = $self->readlen($content_len); + } else { + $self->skip($content_len); + } + } else { + die "Matroska Definition error: type $elem->{valtype} unknown" + } + } else { + $self->skip($content_len); + } + return $elem; +} + +1; + +=head1 CAVEATS + +Children elements have to be processed as soon as an element +with children is found, or their children ignored with +L. Not doing so doesn't cause +errors but results in an invalid structure, with constant C<0> +depth. + +To work correctly in unseekable streams, either the contents +of C-type elements has to be ignored or the C +flag to C has to be true. diff --git a/TOOLS/lib/Parse/Matroska/Utils.pm b/TOOLS/lib/Parse/Matroska/Utils.pm new file mode 100644 index 0000000000..127d626cb1 --- /dev/null +++ b/TOOLS/lib/Parse/Matroska/Utils.pm @@ -0,0 +1,37 @@ +use strict; +use warnings; + +# ABSTRACT: internally-used helper functions +package Parse::Matroska::Utils; + +use Exporter; +our @ISA = qw{Exporter}; +our @EXPORT_OK = qw{uniq uncamelize}; + +=method uniq(@array) + +The same as L. +Included to avoid depending on it since it's +not a core module. + +=cut +sub uniq(@) { + my %seen; + return grep { !$seen{$_}++ } @_; +} + +=method uncamelize($string) + +Converts a "StringLikeTHIS" into a +"string_like_this". + +=cut +sub uncamelize($) { + local $_ = shift; + # lc followed by UC: lc_UC + s/(?<=[a-z])([A-Z])/_\L$1/g; + # UC followed by two lc: _UClclc + s/([A-Z])(?=[a-z]{2})/_\L$1/g; + # strip leading _ that the second regexp might add; lowercase all + s/^_//; lc +} diff --git a/TOOLS/matroska.pl b/TOOLS/matroska.pl new file mode 100755 index 0000000000..3ab06df6f9 --- /dev/null +++ b/TOOLS/matroska.pl @@ -0,0 +1,169 @@ +#! /usr/bin/env perl + +# Generate C definitions for parsing Matroska files. + +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::Bin/lib"; +use Parse::Matroska::Definitions; +use Parse::Matroska::Reader; + +use Getopt::Long; +use List::Util qw{max}; + +my @global_elem_list = @Parse::Matroska::Definitions::global_elem_list; + +Getopt::Long::Configure(qw{auto_version auto_help}); +my %opt; +GetOptions(\%opt, + "generate-header", + "generate-definitions", + "full", + ); + +if ($opt{"generate-header"}) { + generate_c_header(); +} elsif ($opt{"generate-definitions"}) { + generate_c_definitions(); +} else { + for (@ARGV) { + my $reader = Parse::Matroska::Reader->new($_ eq '-' ? \*STDIN : $_) or die $!; + while (my $elem = $reader->read_element($_ eq '-')) { + process_elem($elem, $_ eq '-'); + } + } +} + +# Generate declarations for libmpdemux/ebml_types.h +sub generate_c_header { + print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n"; + + # Write a #define for the ElementID of each known element + for my $el (@global_elem_list) { + printf "#define %-40s 0x%s\n", $el->{definename}, $el->{elid}; + } + print "\n"; + + # Define a struct for each ElementID that has child elements + for my $el (@global_elem_list) { + next unless $el->{subelements}; + print "\nstruct $el->{structname} {\n"; + + # Figure out the length of the longest variable name + # Used for pretty-printing in the next step + my $l = max(map { length $_->{valname} } values %{$el->{subelements}}); + + # Output each variable, with pointers for array (multiple) elements + for my $subel (values %{$el->{subelements}}) { + printf " %-${l}s %s%s;\n", + $subel->{valname}, $subel->{multiple}?'*':' ', $subel->{fieldname}; + } + print "\n"; + + # Output a counter variable for each element + # (presence/absence for scalars, item count for arrays) + for my $subel (values %{$el->{subelements}}) { + print " int n_$subel->{fieldname};\n" + } + print "};\n"; + } + print "\n"; + + # Output extern references for ebml_elem_desc structs for each of the elements + # These are defined by generate_c_definitions + for my $el (@global_elem_list) { + next unless $el->{subelements}; + print "extern const struct ebml_elem_desc $el->{structname}_desc;\n"; + } + print "\n"; + + # Output the max number of sub-elements a known element might have + printf "#define MAX_EBML_SUBELEMENTS %d\n", + max(map { scalar keys %{$_->{subelements}} } + grep { $_->{subelements} } @global_elem_list); +} + +# Generate definitions for libmpdemux/ebml_defs.c +sub generate_c_definitions { + print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n"; + # ebml_defs.c uses macros declared in ebml.c + for my $el (@global_elem_list) { + print "\n"; + if ($el->{subelements}) { + # set N for the next macros + print "#define N $el->{fieldname}\n"; + + # define a struct ebml_$N_desc and gets ready to define fields + # this secretly opens two scopes; hence the }}; at the end + print "E_S(\"$el->{name}\", ".scalar(keys %{$el->{subelements}}).")\n"; + + # define a field for each subelement + # also does lots of macro magic, but doesn't open a scope + for my $subel (values %{$el->{subelements}}) { + print "F($subel->{definename}, $subel->{fieldname}, ". + ($subel->{multiple}?'1':'0').")\n"; + } + # close the struct + print "}};\n"; + + # unset N since we've used it + print "#undef N\n"; + } else { + print "E(\"$el->{name}\", $el->{fieldname}, $el->{ebmltype})\n"; + } + } +} + +sub repr { + my @ret; + foreach (@_) { + if (/'/) { + s/"/\\"/g; + push @ret, "\"$_\""; + } else { + push @ret, "'$_'"; + } + } + return @ret if wantarray; + return pop @ret if defined wantarray; + return; +} + +sub process_elem { + my ($elem, $read_bin) = @_; + unless ($opt{full}) { + if ($elem->{name} eq 'Cluster' || $elem->{name} eq 'Cues') { + $elem->skip; + return; + } + } + die unless $elem; + + if ($elem->{type} ne 'skip') { + print "$elem->{depth} $elem->{elid} $elem->{name} size: $elem->{content_len} value: "; + } + + if ($elem->{type} eq 'sub') { + print "subelements:\n"; + while (my $chld = $elem->next_child($read_bin)) { + process_elem($chld); + } + } elsif ($elem->{type} eq 'binary') { + my $t = "{content_len} bytes>"; + if ($elem->{content_len} < 20) { + $t = unpack "H*", $elem->get_value; + } + print "binary $t\n"; + delete $elem->{value}; + } elsif ($elem->{type} eq 'ebml_id') { + print "binary $elem->{value}->{elid} (".($elem->{value}->{name}||"UNKNOWN").")\n"; + } elsif ($elem->{type} eq 'skip') { + # skip + } elsif ($elem->{type} eq 'str') { + print "string ". repr($elem->get_value) . "\n"; + } else { + print "$elem->{type} ". $elem->get_value ."\n"; + } +} \ No newline at end of file diff --git a/TOOLS/matroska.py b/TOOLS/matroska.py deleted file mode 100755 index 848b033cbd..0000000000 --- a/TOOLS/matroska.py +++ /dev/null @@ -1,429 +0,0 @@ -#!/usr/bin/env python -""" -Generate C definitions for parsing Matroska files. -Can also be used to directly parse Matroska files and display their contents. -""" - -# -# This file is part of MPlayer. -# -# MPlayer is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# MPlayer is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with MPlayer; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# - -# for compatibility with Python 2.x -from __future__ import print_function - -elements_ebml = ( - 'EBML, 1a45dfa3, sub', ( - 'EBMLVersion, 4286, uint', - 'EBMLReadVersion, 42f7, uint', - 'EBMLMaxIDLength, 42f2, uint', - 'EBMLMaxSizeLength, 42f3, uint', - 'DocType, 4282, str', - 'DocTypeVersion, 4287, uint', - 'DocTypeReadVersion, 4285, uint', - ), - - 'CRC32, bf, binary', - 'Void, ec, binary', -) - -elements_matroska = ( - 'Segment, 18538067, sub', ( - - 'SeekHead*, 114d9b74, sub', ( - 'Seek*, 4dbb, sub', ( - 'SeekID, 53ab, ebml_id', - 'SeekPosition, 53ac, uint', - ), - ), - - 'Info*, 1549a966, sub', ( - 'SegmentUID, 73a4, binary', - 'PrevUID, 3cb923, binary', - 'NextUID, 3eb923, binary', - 'TimecodeScale, 2ad7b1, uint', - 'DateUTC, 4461, sint', - 'Title, 7ba9, str', - 'MuxingApp, 4d80, str', - 'WritingApp, 5741, str', - 'Duration, 4489, float', - ), - - 'Cluster*, 1f43b675, sub', ( - 'Timecode, e7, uint', - 'BlockGroup*, a0, sub', ( - 'Block, a1, binary', - 'BlockDuration, 9b, uint', - 'ReferenceBlock*, fb, sint', - ), - 'SimpleBlock*, a3, binary', - ), - - 'Tracks*, 1654ae6b, sub', ( - 'TrackEntry*, ae, sub', ( - 'TrackNumber, d7, uint', - 'TrackUID, 73c5, uint', - 'TrackType, 83, uint', - 'FlagEnabled, b9, uint', - 'FlagDefault, 88, uint', - 'FlagForced, 55aa, uint', - 'FlagLacing, 9c, uint', - 'MinCache, 6de7, uint', - 'MaxCache, 6df8, uint', - 'DefaultDuration, 23e383, uint', - 'TrackTimecodeScale, 23314f, float', - 'MaxBlockAdditionID, 55ee, uint', - 'Name, 536e, str', - 'Language, 22b59c, str', - 'CodecID, 86, str', - 'CodecPrivate, 63a2, binary', - 'CodecName, 258688, str', - 'CodecDecodeAll, aa, uint', - 'Video, e0, sub', ( - 'FlagInterlaced, 9a, uint', - 'PixelWidth, b0, uint', - 'PixelHeight, ba, uint', - 'DisplayWidth, 54b0, uint', - 'DisplayHeight, 54ba, uint', - 'DisplayUnit, 54b2, uint', - 'FrameRate, 2383e3, float', - ), - 'Audio, e1, sub', ( - 'SamplingFrequency, b5, float', - 'OutputSamplingFrequency, 78b5, float', - 'Channels, 9f, uint', - 'BitDepth, 6264, uint', - ), - 'ContentEncodings, 6d80, sub', ( - 'ContentEncoding*, 6240, sub', ( - 'ContentEncodingOrder, 5031, uint', - 'ContentEncodingScope, 5032, uint', - 'ContentEncodingType, 5033, uint', - 'ContentCompression, 5034, sub', ( - 'ContentCompAlgo, 4254, uint', - 'ContentCompSettings, 4255, binary', - ), - ), - ), - ), - ), - - 'Cues, 1c53bb6b, sub', ( - 'CuePoint*, bb, sub', ( - 'CueTime, b3, uint', - 'CueTrackPositions*, b7, sub', ( - 'CueTrack, f7, uint', - 'CueClusterPosition, f1, uint', - ), - ), - ), - - 'Attachments, 1941a469, sub', ( - 'AttachedFile*, 61a7, sub', ( - 'FileDescription, 467e, str', - 'FileName, 466e, str', - 'FileMimeType, 4660, str', - 'FileData, 465c, binary', - 'FileUID, 46ae, uint', - ), - ), - - 'Chapters, 1043a770, sub', ( - 'EditionEntry*, 45b9, sub', ( - 'EditionUID, 45bc, uint', - 'EditionFlagHidden, 45bd, uint', - 'EditionFlagDefault, 45db, uint', - 'EditionFlagOrdered, 45dd, uint', - 'ChapterAtom*, b6, sub', ( - 'ChapterUID, 73c4, uint', - 'ChapterTimeStart, 91, uint', - 'ChapterTimeEnd, 92, uint', - 'ChapterFlagHidden, 98, uint', - 'ChapterFlagEnabled, 4598, uint', - 'ChapterSegmentUID, 6e67, binary', - 'ChapterSegmentEditionUID, 6ebc, uint', - 'ChapterDisplay*, 80, sub', ( - 'ChapString, 85, str', - 'ChapLanguage*, 437c, str', - 'ChapCountry*, 437e, str', - ), - ), - ), - ), - 'Tags*, 1254c367, sub', ( - 'Tag*, 7373, sub', ( - 'Targets, 63c0, sub', ( - 'TargetTypeValue, 68ca, uint', - 'TargetTrackUID, 63c5, uint', - 'TargetEditionUID, 63c9, uint', - 'TargetChapterUID, 63c4, uint', - 'TargetAttachmentUID, 63c6, uint', - ), - 'SimpleTag*, 67c8, sub', ( - 'TagName, 45a3, str', - 'TagLanguage, 447a, str', - 'TagString, 4487, str' - ), - ), - ), - ), -) - - -import sys -from math import ldexp -from binascii import hexlify - -def byte2num(s): - return int(hexlify(s), 16) - -class EOF(Exception): pass - -def camelcase_to_words(name): - parts = [] - start = 0 - for i in range(1, len(name)): - if name[i].isupper() and (name[i-1].islower() or - name[i+1:i+2].islower()): - parts.append(name[start:i]) - start = i - parts.append(name[start:]) - return '_'.join(parts).lower() - -class MatroskaElement(object): - - def __init__(self, name, elid, valtype, namespace): - self.name = name - self.definename = '{0}_ID_{1}'.format(namespace, name.upper()) - self.fieldname = camelcase_to_words(name) - self.structname = 'ebml_' + self.fieldname - self.elid = elid - self.valtype = valtype - if valtype == 'sub': - self.ebmltype = 'EBML_TYPE_SUBELEMENTS' - self.valname = 'struct ' + self.structname - else: - self.ebmltype = 'EBML_TYPE_' + valtype.upper() - try: - self.valname = {'uint': 'uint64_t', 'str': 'struct bstr', - 'binary': 'struct bstr', 'ebml_id': 'uint32_t', - 'float': 'double', 'sint': 'int64_t', - }[valtype] - except KeyError: - raise SyntaxError('Unrecognized value type ' + valtype) - self.subelements = () - - def add_subelements(self, subelements): - self.subelements = subelements - self.subids = set(x[0].elid for x in subelements) - -elementd = {} -elementlist = [] -def parse_elems(l, namespace): - subelements = [] - for el in l: - if isinstance(el, str): - name, hexid, eltype = [x.strip() for x in el.split(',')] - multiple = name.endswith('*') - name = name.strip('*') - new = MatroskaElement(name, hexid, eltype, namespace) - elementd[hexid] = new - elementlist.append(new) - subelements.append((new, multiple)) - else: - new.add_subelements(parse_elems(el, namespace)) - return subelements - -parse_elems(elements_ebml, 'EBML') -parse_elems(elements_matroska, 'MATROSKA') - -def generate_C_header(): - print('// Generated by TOOLS/matroska.py, do not edit manually') - print() - - for el in elementlist: - print('#define {0.definename:40} 0x{0.elid}'.format(el)) - - print() - - for el in reversed(elementlist): - if not el.subelements: - continue - print() - print('struct {0.structname} {{'.format(el)) - l = max(len(subel.valname) for subel, multiple in el.subelements)+1 - for subel, multiple in el.subelements: - print(' {e.valname:{l}} {star}{e.fieldname};'.format( - e=subel, l=l, star=' *'[multiple])) - print() - for subel, multiple in el.subelements: - print(' int n_{0.fieldname};'.format(subel)) - print('};') - - for el in elementlist: - if not el.subelements: - continue - print('extern const struct ebml_elem_desc {0.structname}_desc;'.format( - el)) - - print() - print('#define MAX_EBML_SUBELEMENTS', max(len(el.subelements) - for el in elementlist)) - - - -def generate_C_definitions(): - print('// Generated by TOOLS/matroska.py, do not edit manually') - print() - for el in reversed(elementlist): - print() - if el.subelements: - print('#define N', el.fieldname) - print('E_S("{0}", {1})'.format(el.name, len(el.subelements))) - for subel, multiple in el.subelements: - print('F({0.definename}, {0.fieldname}, {1})'.format( - subel, int(multiple))) - print('}};') - print('#undef N') - else: - print('E("{0.name}", {0.fieldname}, {0.ebmltype})'.format(el)) - -def read(s, length): - t = s.read(length) - if len(t) != length: - raise EOF - return t - -def read_id(s): - t = read(s, 1) - i = 0 - mask = 128 - if ord(t) == 0: - raise SyntaxError - while not ord(t) & mask: - i += 1 - mask >>= 1 - t += read(s, i) - return t - -def read_vint(s): - t = read(s, 1) - i = 0 - mask = 128 - if ord(t) == 0: - raise SyntaxError - while not ord(t) & mask: - i += 1 - mask >>= 1 - t = bytes((ord(t) & (mask - 1),)) - t += read(s, i) - return i+1, byte2num(t) - -def read_str(s, length): - return read(s, length) - -def read_uint(s, length): - t = read(s, length) - return byte2num(t) - -def read_sint(s, length): - i = read_uint(s, length) - mask = 1 << (length * 8 - 1) - if i & mask: - i -= 2 * mask - return i - -def read_float(s, length): - t = read(s, length) - i = byte2num(t) - if length == 4: - f = ldexp((i & 0x7fffff) + (1 << 23), (i >> 23 & 0xff) - 150) - if i & (1 << 31): - f = -f - elif length == 8: - f = ldexp((i & ((1 << 52) - 1)) + (1 << 52), (i >> 52 & 0x7ff) - 1075) - if i & (1 << 63): - f = -f - else: - raise SyntaxError - return f - -def parse_one(s, depth, parent, maxlen): - elid = hexlify(read_id(s)).decode('ascii') - elem = elementd.get(elid) - if parent is not None and elid not in parent.subids and elid not in ('ec', 'bf'): - print('Unexpected:', elid) - if 1: - raise NotImplementedError - size, length = read_vint(s) - this_length = len(elid) / 2 + size + length - if elem is not None: - if elem.valtype != 'skip': - print(depth, elid, elem.name, 'size:', length, 'value:', end=' ') - if elem.valtype == 'sub': - print('subelements:') - while length > 0: - length -= parse_one(s, depth + 1, elem, length) - if length < 0: - raise SyntaxError - elif elem.valtype == 'str': - print('string', repr(read_str(s, length).decode('utf8', 'replace'))) - elif elem.valtype in ('binary', 'ebml_id'): - t = read_str(s, length) - dec = '' - if elem.valtype == 'ebml_id': - idelem = elementd.get(hexlify(t).decode('ascii')) - if idelem is None: - dec = '(UNKNOWN)' - else: - dec = '({0.name})'.format(idelem) - if len(t) < 20: - t = hexlify(t).decode('ascii') - else: - t = ''.format(len(t)) - print('binary', t, dec) - elif elem.valtype == 'uint': - print('uint', read_uint(s, length)) - elif elem.valtype == 'sint': - print('sint', read_sint(s, length)) - elif elem.valtype == 'float': - print('float', read_float(s, length)) - elif elem.valtype == 'skip': - read(s, length) - else: - raise NotImplementedError - else: - print(depth, 'Unknown element:', elid, 'size:', length) - read(s, length) - return this_length - -def parse_toplevel(s): - parse_one(s, 0, None, 1 << 63) - -if sys.argv[1] == '--generate-header': - generate_C_header() -elif sys.argv[1] == '--generate-definitions': - generate_C_definitions() -else: - s = open(sys.argv[1], "rb") - while 1: - start = s.tell() - try: - parse_toplevel(s) - except EOF: - if s.tell() != start: - raise Exception("Unexpected end of file") - break diff --git a/TOOLS/vdpau_functions.pl b/TOOLS/vdpau_functions.pl new file mode 100755 index 0000000000..8bab4e533b --- /dev/null +++ b/TOOLS/vdpau_functions.pl @@ -0,0 +1,74 @@ +#! /usr/bin/env perl + +# Generates vdpau_template.c + +use strict; +use warnings; + +sub camelize($) { + my $s = shift; + $s =~ s/(?:^|_)([a-z])/\u$1/g; + $s; +} + +print <) { + # strip whitespace, ignore anything after a '#' + $f =~ /^\s*(.*?)\s*(?:(? 1: - mp_name, vdpau_name = parts - else: - mp_name = parts[0] - vdpau_name = ''.join(part.capitalize() for part in mp_name.split('_')) - macro_name = mp_name.upper() - print('VDP_FUNCTION(Vdp%s, VDP_FUNC_ID_%s, %s)' % (vdpau_name, macro_name, m