package xm::pfe::wordset2index;
use strict;
use xm::o;

sub DESC
{"
  get a list of wordsets, scan them and prepare them to be pushed
  into their own files, each file named wordset-<namewordset>. The
  attribute wordsetname is used directly from the wordset-info.
  Then walk the text of each wordsetfile, create an internal
  database of all XREFWORDREF, make them filenamelike (url-index-conformant)
  attach the url-name-tag to XREFWORDREF. When the xrefwordref-database
  is complete, walk the text and attach to <XREF>-data the
  appropriate href, complete with filename#indexname in html' url-encoding.
"}

sub DO
{
    my $in = shift;
    my $out = "";
    my $wordset;
    my $word_id;
    my $key;
    my @keys;
    my $i;
    my $index = "index-wordsets";
    $index = $o{"index-wordsets"} if exists $o{"index-wordsets"};

    print STDERR "<$index> ";

    # stage 1 : grab the wordsets

    my $getNAMEWORDSET = sub {
	if ($_[0] =~ m{ <NAMEWORDSET(?:\s[^<>]*)?> 
			    ((?:.(?!</NAMEWORDSET[\s>]))*.)
				</NAMEWORDSET(?:\s[^<>]*)?> }sx)
	{ return $1; }
	else { return ""; }
    };

    my $getCSTRWORDCNT = sub {
	if ($_[0] =~ m{ <CSTRWORDCNT(?:\s[^<>]*)?> 
			    ((?:.(?!</CSTRWORDCNT[\s>]))*.)
				</CSTRWORDCNT(?:\s[^<>]*)?> }sx)
	{ return $1; }
	else { return ""; }
    };
		

    $in =~ s{ (<ITEMWORDSET(?:\s[^<>]*)?>)  
		  ((?:.(?!</?ITEMWORDSET[\s>]))*.) (</ITEMWORDSET(?:\s[^<>]*)?>)
		  }
    { 
	$out = $1.$2.$3; 
	$key = &$getNAMEWORDSET($out); # print STDERR "<!$key>";
      
	if (length $key)
	{
	    push @keys, $key;
	    $$wordset{$key}{text} = $out;
	    $$wordset{$key}{desc} = &$getCSTRWORDCNT($out);
	};
	""
	}gsex;

    # stage 2 : grab XREFWORDREFs, enumerate and a-name them.

    for $key (@keys)
    {
	$i = 1;
	$$wordset{$key}{text} =~
	    s{ (<XREFWORDREF(?:\s[^<>]*)?)>  
		   ((?:.(?!</?XREFWORDREF[\s>]))*.) (</XREFWORDREF(?:\s[^<>]*)?>)
		   }
	{ 
	    $$wordset{$key}{name}{$2} = "$key.0$i";
	    $out = $1." id=\"$key.0$i\" >".$2.$3; 
	    $i++;
	    $out
	    }gsex;
	$$wordset{$key}{refs} = $i - 1;
    }
     
    # stage 3 : build a table of XREFWORDREFs across all wordsets

    for $key (@keys)
    {
	for $i (keys %{$$wordset{$key}{name}})
	{
	    if (not exists $$word_id{$i})
	    {
		$$word_id{$i} = "wordset-$key.html#".$$wordset{$key}{name}{$i};
	    }
	}
    }

    # stage 4 : resolve XREFs, prefer local defs, then the global wordref.

    for $key (@keys)
    {
	$$wordset{$key}{text} =~
	    s{ (<XREF)(\s[^<>]*)?>
		   ((?:.(?!</?XREF[\s>]))*.) (</XREF)(\s[^<>]*)?>
		   }
	{ 
	    if (exists $$wordset{$key}{name}{$3})
	    {
		$out = $1." href=\"#".$$wordset{$key}{name}{$3}."\"".$2.">"
		    .$3.$4." href".$5.">";
	    } elsif (exists $$word_id{$3})
	    {
		$out = $1." href=\"".$$word_id{$3}."\"".$2.">"
		    .$3.$4." href".$5.">";
	    }else
	    { # it may or not be a good solution to href nonresolvables.
		$out = $1." href=\"$index.html\"".$2.">"
		    .$3.$4." href".$5.">";
	    };
	    $out
	    }gsex;
    }

    # stage 5 : create the wordset html-xml's and index-wordset.xml
    
    my $out = ""; # the return of this sub is the index-list.
    my $idx = ""; # the index-file
    my $F;
    for $key (@keys)
    {
	$idx .= "<ITEMFILEWORDSET>"
	    ."<NAMEWORDSET  href=\"wordset-$key.html\">\n".$key."</NAMEWORDSET href>\n"
		."<CSTRWORDCNT>".$$wordset{$key}{desc}."</CSTRWORDCNT>\n"
		    ."<XREFDEFCNT>".$$wordset{$key}{refs}."</XREFDEFCNT>\n"
			."</ITEMFILEWORDSET>";

	$F = "wordset-$key.xml";
	open F,">$F" or next;
	print F $$wordset{$key}{text};
	close F;
	$out .= $F."\n";
    }

    $F = "$index.xml";
    if (open F,">$F")
    {
	print F $idx;
	close F;
	$out .= $F."\n";
    }

    print STDERR "</$index>\n";
    return $out;
}

sub ARGS { return    xm::o::args_stdin(@_,DESC); }
sub main { return DO(xm::o::args_stdin(@_,DESC)); }

1;