package xm::pfe::wordset2index;
use strict;
use xm::o;
"
get a list of wordsets, scan them and prepare them to be pushed
into their own files, each file named wordset-<namewordset>. The
attribute wordsetname is used directly from the wordset-info.
Then walk the text of each wordsetfile, create an internal
database of all XREFWORDREF, make them filenamelike (url-index-conformant)
attach the url-name-tag to XREFWORDREF. When the xrefwordref-database
is complete, walk the text and attach to <XREF>-data the
appropriate href, complete with filename#indexname in html' url-encoding.
"}
my $in = shift;
my $out = "";
my $wordset;
my $word_id;
my $key;
my @keys;
my $i;
my $index = "index-wordsets";
$index = $o{"index-wordsets"} if exists $o{"index-wordsets"};
print STDERR "<$index> ";
my $getNAMEWORDSET = sub {
if ($_[0] =~ m{ <NAMEWORDSET(?:\s[^<>]*)?>
((?:.(?!</NAMEWORDSET[\s>]))*.)
</NAMEWORDSET(?:\s[^<>]*)?> }sx)
{ return $1; }
else { return ""; }
};
my $getCSTRWORDCNT = sub {
if ($_[0] =~ m{ <CSTRWORDCNT(?:\s[^<>]*)?>
((?:.(?!</CSTRWORDCNT[\s>]))*.)
</CSTRWORDCNT(?:\s[^<>]*)?> }sx)
{ return $1; }
else { return ""; }
};
$in =~ s{ (<ITEMWORDSET(?:\s[^<>]*)?>)
((?:.(?!</?ITEMWORDSET[\s>]))*.) (</ITEMWORDSET(?:\s[^<>]*)?>)
}
{
$out = $1.$2.$3;
$key = &$getNAMEWORDSET($out); if (length $key)
{
push @keys, $key;
$$wordset{$key}{text} = $out;
$$wordset{$key}{desc} = &$getCSTRWORDCNT($out);
};
""
}gsex;
for $key (@keys)
{
$i = 1;
$$wordset{$key}{text} =~
s{ (<XREFWORDREF(?:\s[^<>]*)?)>
((?:.(?!</?XREFWORDREF[\s>]))*.) (</XREFWORDREF(?:\s[^<>]*)?>)
}
{
$$wordset{$key}{name}{$2} = "$key.0$i";
$out = $1." id=\"$key.0$i\" >".$2.$3;
$i++;
$out
}gsex;
$$wordset{$key}{refs} = $i - 1;
}
for $key (@keys)
{
for $i (keys %{$$wordset{$key}{name}})
{
if (not exists $$word_id{$i})
{
$$word_id{$i} = "wordset-$key.html#".$$wordset{$key}{name}{$i};
}
}
}
for $key (@keys)
{
$$wordset{$key}{text} =~
s{ (<XREF)(\s[^<>]*)?>
((?:.(?!</?XREF[\s>]))*.) (</XREF)(\s[^<>]*)?>
}
{
if (exists $$wordset{$key}{name}{$3})
{
$out = $1." href=\"#".$$wordset{$key}{name}{$3}."\"".$2.">"
.$3.$4." href".$5.">";
} elsif (exists $$word_id{$3})
{
$out = $1." href=\"".$$word_id{$3}."\"".$2.">"
.$3.$4." href".$5.">";
}else
{ $out = $1." href=\"$index.html\"".$2.">"
.$3.$4." href".$5.">";
};
$out
}gsex;
}
my $out = ""; my $idx = ""; my $F;
for $key (@keys)
{
$idx .= "<ITEMFILEWORDSET>"
."<NAMEWORDSET href=\"wordset-$key.html\">\n".$key."</NAMEWORDSET href>\n"
."<CSTRWORDCNT>".$$wordset{$key}{desc}."</CSTRWORDCNT>\n"
."<XREFDEFCNT>".$$wordset{$key}{refs}."</XREFDEFCNT>\n"
."</ITEMFILEWORDSET>";
$F = "wordset-$key.xml";
open F,">$F" or next;
print F $$wordset{$key}{text};
close F;
$out .= $F."\n";
}
$F = "$index.xml";
if (open F,">$F")
{
print F $idx;
close F;
$out .= $F."\n";
}
print STDERR "</$index>\n";
return $out;
}
return xm::o::args_stdin(@_,DESC); }
return DO(xm::o::args_stdin(@_,DESC)); }
1;