use strict;
use xm::o;

# in this document, a "field" is a scalar, a "record" is a hash-ref, 
# and a "list" is an array of "record"s (i.e. array of hash-refs).
# these are used both for their xm-format and their perldata
# representation. The term "tab" refers to the xm representation,
# while "hash" refers to the perldata representation.

sub detect_fields
{
    my $text = shift;
    my $field = shift;
    
    $text =~ s{ <([_A-Z]+)\b[^<>]*> (?:.(?!</?\1\b))*. </\1\b[^<>]*> }
             { push @{$field}, $1; "" }gsex;
}

# call on the body of each tabrecord,
# returns the fieldhash containing the fields along with their xmtype.
# attributes in the field markups are lost.
sub mkrecord
{
  local $_ = shift;
  my %hash;

  s{ <([_A-Z]+)\b[^<>*> (?:.(?!</?\1\b))*. </\1\b[^<>]*> }
   { $hash{$1} = $2; "" }gsex;

  return \%hash;
}

# call on the text of a tablist.
# returns the hashlist with each item pointing to a fieldhash.
# attributes in the record-markups and field-markups are lost.
sub mklist
{
    local $_ = shift;
    my $itemsep;

    /^[^<>]*<([_A-Z]+)\b/sx && do { $itemsep = $1; };
    
    my @array;

    s{ ^<$itemsep\b[^<>]*> ((?:.(?!<\/?$itemsep\b))*.) <\/$itemsep\b[^<>]*>}
     {
        push @array, mkrecord($1);
     }gsex;

    return \@array;
}

# call on the text of a tablist.
# returns the hashlist with each item pointing to a fieldhash.
# attributes in the field-leaders are lost. 
# attributes in the record-markups are stored in a
# hashentry called "";
sub mklists
{
    local $_ = shift;
    my $itemsep;

    /^[^<>]*<([_A-Z]+)\b/sx && do { $itemsep = $1; };
    
    my @array;

    s{ ^<$itemsep\b([^<>]*)> ((?:.(?!<\/?$itemsep\b))*.) <\/$itemsep\b[^<>]*>}
     {
        push @array, fieldhash($2);
	$array[$#array]{""} = $1 if length $1;
     }gsex;

    return \@array;
}

# walk the hashlist. return the fieldhash that has
# the specified field set to the specified value.
# for multiple matches, the first index is returned.
# beware: for multiple lookups, create an indexhash!
# seealso: lookupall (returns a fieldarray of all matches)
sub lookup
{
    my ($list,$field,$value) = @_;
    my $k;

    for $k (@$list)
    {
        return $k if $$k{$field} eq $value;
    }
    return undef;
}

# walk the hashlist. create a hash that stores
# for each field-value the corresponding index in the list.
# if a values exists more than once, the first index is used.
# seealso: indexhashall (returns a hash of indexarrays)
sub indexhash
{
    my ($list,$field) = @_;
    my ($k,$i,%hash);

    $i = $#$list;
    while ($i)
    {
       --$i;
       if (defined $$list[$i]{$field})
       {
	  $hash{$$list[$i]{$field}} = $i;
       }
    }
    return \%hash;
}

# walk the hashlist. return an array of refs to the
# the fieldhashes where each of the fieldhashes has 
# the specified field set to the specified value.
# beware: for multiple lookups, create an indexhash!
sub lookupall
{
    my ($list,$field,$value) = @_;
    my $k;
    my $a;

    for $k (@$list)
    {
        push @$a, $k if $$k{$field} eq $value;
    }
    return $a;
}

# walk the hashlist. create a hash that stores
# for each field-value the corresponding index in an array.
# if a values exists more than once, the first index is used.
sub indexhashall
{
    my ($list,$field) = @_;
    my ($k,$i,%hash);

    $i = 0; 
    while ($i < $#$list)
    {
       ++$i;
       if (defined $$list[$i]{$field})
       {
          my $k = $$list[$i]{$filed};
          $hash{$k} = \[ ] if not defined $hash{$k}; # superfluous??
	  push @$hash{$k}, $i;
       }
    }
    return \%hash;
}

# scan the comandline, open everything that looks like a filename
# and convert the content to a tab::list storing the tab's array-ref 
# in the returned hash as ${"filename"}.  Everything that looks like an 
# option is going to set a $o{name}-value. The default is "" for an empty
# value. (uses: xm::o::argv_filehash)
sub argv_filehash 
{
  my $texthash = xm::o::argv_filehash(@_);
  my $datahash = { };
  my $f;

  for $f (keys %$texthash)
  {
	$$datahash{$f} = mklist($$texthash{$f});
  }

  return $datahash;
}

1;