package xm::pfe::wordset2words;
use strict;
use xm::o;
"
this script is derived from wordset2index
- but instead of generating one xml-page per wordset, there is now
one page per export-entry. However the filenames are along the
lines of the wordset, and therefore the grab/index process is
widely identical. We just need another step in the middle to
cut out the export-entries. resolve/spitout are very similar
after that. Therefore just like wordset2index, we will...
get a list of wordsets, walk the text of each wordsetfile,
create an internal database of all XREFWORDREF and of course
the defintion, make the xrefs filenamelike to get them spit out
a bit later. When the xrefwordref-database is complete, walk
the texts and attach to <XREF>-data the appropriate href to have
an exact cross-reference in html' url-encoding.
"}
my $in = shift;
my $out = "";
my $wordset;
my $word_id;
my $key;
my @keys;
my $i;
my $w;
my $word;
my $index = "index-words";
$index = $o{"index-words"} if exists $o{"index-words"};
print STDERR "<$index> ";
my $getNAMEWORDSET = sub {
if ($_[0] =~ m{ <NAMEWORDSET(?:\s[^<>]*)?>
((?:.(?!</?NAMEWORDSET[\s>]))*.)
</NAMEWORDSET(?:\s[^<>]*)?> }sx)
{ return $1; }
else { return ""; }
};
my $getCSTRWORDCNT = sub {
if ($_[0] =~ m{ <CSTRWORDCNT(?:\s[^<>]*)?>
((?:.(?!</?CSTRWORDCNT[\s>]))*.)
</CSTRWORDCNT(?:\s[^<>]*)?> }sx)
{ return $1; }
else { return ""; }
};
my $getXREFWORDREF = sub {
if ($_[0] =~ m{ <XREFWORDREF(?:\s[^<>]*)?>
((?:.(?!</?XREFWORDREF[\s>]))*.)
</XREFWORDREF(?:\s[^<>]*)?> }sx)
{ return $1; }
else { return ""; }
};
my $useXDEFSTACK = sub {
if ($_[0] =~ m{ (<XDEFSTACK(?:\s[^<>]*)?>)
((?:.(?!</?XDEFSTACK[\s>]))*.)
(</XDEFSTACK(?:\s[^<>]*)?>) }sx)
{ return $1.$2.$3; }
else { return ""; }
};
$in =~ s{ (<ITEMWORDSET(?:\s[^<>]*)?>)
((?:.(?!</?ITEMWORDSET[\s>]))*.) (</ITEMWORDSET(?:\s[^<>]*)?>)
}
{
$out = $1.$2.$3;
$key = &$getNAMEWORDSET($out); if (length $key)
{
push @keys, $key;
$$wordset{$key}{text} = $out;
$$wordset{$key}{desc} = &$getCSTRWORDCNT($out);
};
""
}gsex;
for $key (@keys)
{
$i = 1;
$$wordset{$key}{text} =~
s{ (<XREFWORDREF(?:\s[^<>]*)?>)
((?:.(?!</?XREFWORDREF[\s>]))*.) (</XREFWORDREF(?:\s[^<>]*)?>)
}
{
$$wordset{$key}{name}{$2} = "w-$key-0$i.html";
$i++;
$1.$2.$3
}gsex;
$$wordset{$key}{refs} = $i - 1;
}
for $key (@keys)
{
for $i (keys %{$$wordset{$key}{name}})
{
if (not exists $$word_id{$i})
{
$$word_id{$i} = $$wordset{$key}{name}{$i};
}
}
}
for $key (@keys)
{
$$wordset{$key}{text} =~
s{ (<XREF)(\s[^<>]*)?>
((?:.(?!</?XREF[\s>]))*.) (</XREF)(\s[^<>]*)?>
}
{
if (exists $$wordset{$key}{name}{$3})
{
$out = $1." href=\"".$$wordset{$key}{name}{$3}."\"".$2.">"
.$3.$4." href".$5.">";
} elsif (exists $$word_id{$3})
{
$out = $1." href=\"".$$word_id{$3}."\"".$2.">"
.$3.$4." href".$5.">";
}else
{ $out = $1." href=\"$index.html\"".$2.">"
.$3.$4." href".$5.">";
};
$out
}gsex;
}
for $key (@keys)
{
$$wordset{$key}{text} =~
s{ (<ITEMWORDREF(?:\s[^<>]*)?>)
((?:.(?!</?ITEMWORDREF[\s>]))*.)
(</ITEMWORDREF(?:\s[^<>]*)?>)
}
{
$out = $1.$2.$3;
$i = &$getXREFWORDREF($out);
if (length $i and exists $$wordset{$key}{name}{$i})
{
$$word{$key}{$i}{file} = $$wordset{$key}{name}{$i};
$$word{$key}{$i}{text} = $out;
};
""
}gsex;
}
$i = 1;
$out = ""; my $idx; my $F;
for $key (@keys)
{
for $w (keys %{$$word{$key}})
{
$F = $$word{$key}{$w}{file};
$$idx{"$w<$i>"} .= "<ITEMWORDENTRY>\n"
."<XREFWORDREF href=\"$F\">".$w."</XREFWORDREF href>\n"
.&$useXDEFSTACK($$word{$key}{$w}{text})."\n"
."<NAMEWORDSETINFO>"." -- "."</NAMEWORDSETINFO>\n"
."<NAMEWORDSET>".$key."</NAMEWORDSET>\n"
."</ITEMWORDENTRY>\n";
$i++;
$F =~ s/.html$/.xml/;
open F,">$F" or next;
print F "<title>",$w,"</title>\n";
print F $$word{$key}{$w}{text};
close F;
$out .= $F."\n";
}
}
$F = "$index.xml";
if (open F,">$F")
{
for $w (sort keys %$idx)
{
print F $$idx{$w};
}
close F;
$out .= $F."\n";
}
print STDERR "</$index>\n";
return $out;
}
return xm::o::args_stdin(@_,DESC); }
return DO(xm::o::args_stdin(@_,DESC)); }
1;