tab | sub | tabs |
use strict; use xm::o; # in this document, a "field" is a scalar, a "record" is a hash-ref, # and a "list" is an array of "record"s (i.e. array of hash-refs). # these are used both for their xm-format and their perldata # representation. The term "tab" refers to the xm representation, # while "hash" refers to the perldata representation. sub detect_fields { my $text = shift; my $field = shift; $text =~ s{ <([_A-Z]+)\b[^<>]*> (?:.(?!</?\1\b))*. </\1\b[^<>]*> } { push @{$field}, $1; "" }gsex; } # call on the body of each tabrecord, # returns the fieldhash containing the fields along with their xmtype. # attributes in the field markups are lost. sub mkrecord { local $_ = shift; my %hash; s{ <([_A-Z]+)\b[^<>*> (?:.(?!</?\1\b))*. </\1\b[^<>]*> } { $hash{$1} = $2; "" }gsex; return \%hash; } # call on the text of a tablist. # returns the hashlist with each item pointing to a fieldhash. # attributes in the record-markups and field-markups are lost. sub mklist { local $_ = shift; my $itemsep; /^[^<>]*<([_A-Z]+)\b/sx && do { $itemsep = $1; }; my @array; s{ ^<$itemsep\b[^<>]*> ((?:.(?!<\/?$itemsep\b))*.) <\/$itemsep\b[^<>]*>} { push @array, mkrecord($1); }gsex; return \@array; } # call on the text of a tablist. # returns the hashlist with each item pointing to a fieldhash. # attributes in the field-leaders are lost. # attributes in the record-markups are stored in a # hashentry called ""; sub mklists { local $_ = shift; my $itemsep; /^[^<>]*<([_A-Z]+)\b/sx && do { $itemsep = $1; }; my @array; s{ ^<$itemsep\b([^<>]*)> ((?:.(?!<\/?$itemsep\b))*.) <\/$itemsep\b[^<>]*>} { push @array, fieldhash($2); $array[$#array]{""} = $1 if length $1; }gsex; return \@array; } # walk the hashlist. return the fieldhash that has # the specified field set to the specified value. # for multiple matches, the first index is returned. # beware: for multiple lookups, create an indexhash! # seealso: lookupall (returns a fieldarray of all matches) sub lookup { my ($list,$field,$value) = @_; my $k; for $k (@$list) { return $k if $$k{$field} eq $value; } return undef; } # walk the hashlist. create a hash that stores # for each field-value the corresponding index in the list. # if a values exists more than once, the first index is used. # seealso: indexhashall (returns a hash of indexarrays) sub indexhash { my ($list,$field) = @_; my ($k,$i,%hash); $i = $#$list; while ($i) { --$i; if (defined $$list[$i]{$field}) { $hash{$$list[$i]{$field}} = $i; } } return \%hash; } # walk the hashlist. return an array of refs to the # the fieldhashes where each of the fieldhashes has # the specified field set to the specified value. # beware: for multiple lookups, create an indexhash! sub lookupall { my ($list,$field,$value) = @_; my $k; my $a; for $k (@$list) { push @$a, $k if $$k{$field} eq $value; } return $a; } # walk the hashlist. create a hash that stores # for each field-value the corresponding index in an array. # if a values exists more than once, the first index is used. sub indexhashall { my ($list,$field) = @_; my ($k,$i,%hash); $i = 0; while ($i < $#$list) { ++$i; if (defined $$list[$i]{$field}) { my $k = $$list[$i]{$filed}; $hash{$k} = \[ ] if not defined $hash{$k}; # superfluous?? push @$hash{$k}, $i; } } return \%hash; } # scan the comandline, open everything that looks like a filename # and convert the content to a tab::list storing the tab's array-ref # in the returned hash as ${"filename"}. Everything that looks like an # option is going to set a $o{name}-value. The default is "" for an empty # value. (uses: xm::o::argv_filehash) sub argv_filehash { my $texthash = xm::o::argv_filehash(@_); my $datahash = { }; my $f; for $f (keys %$texthash) { $$datahash{$f} = mklist($$texthash{$f}); } return $datahash; } 1;