package xm::pfe::flattenwordrefs;
use strict;
use xm::o;
use xm::sub;
use xm::pfe::cutfcodedocs;

sub DESC
{"
  look for a wordset-table, kill the C-syntax around it, and
  flatten the information. That is, prepend the latest _INTO-spec
  onto a word-export to from the XREF we can later dbjoin, and
  add another id for the wordset. The whole thing shall look a
  bit more like text than C after it is run through.
"}

sub coded
{
    return "ordinary primitive"    if $_[0] eq "P4_FXCO" or $_[0] eq "P4_FXco"
	or "CO" eq $_[0];
    return "immediate primitive"   if $_[0] eq "P4_IXCO" or $_[0] eq "P4_IXco"
	or "CI" eq $_[0];
    return "compiling primitive"   if $_[0] eq "P4_SXCO" or $_[0] eq "P4_SXco"
	or "CS" eq $_[0];
    return "constructor primitive" if $_[0] eq "P4_XXCO" or $_[0] eq "P4_XXco"
	or "CX" eq $_[0];
    return "defining primitive"	   if $_[0] eq "P4_RTCO" or $_[0] eq "P4_RTco";
    return "ordinary variable"     if $_[0] eq "P4_OVAR" or $_[0] eq "P4_OVaR"
	or "OV" eq $_[0];
    return "immediate variable"    if $_[0] eq "P4_IVAR" or $_[0] eq "P4_IVaR"
	or "IV" eq $_[0];
    return "ordinary valuevar"     if $_[0] eq "P4_OVAL" or $_[0] eq "P4_OVaL"
	or "OL" eq $_[0];
    return "immediate valuevar"    if $_[0] eq "P4_IVAL" or $_[0] eq "P4_IVaL"
	or "IL" eq $_[0];
    return "ordinary constant"     if $_[0] eq "P4_OCON" or $_[0] eq "P4_OCoN"
	or "OC" eq $_[0];
    return "immediate constant"    if $_[0] eq "P4_ICON" or $_[0] eq "P4_ICoN"
	or "IC" eq $_[0];
    return "threadstate variable"  if $_[0] eq "P4_DVAR" or $_[0] eq "P4_DVaR"
	or "DV" eq $_[0];
    return "threadstate valueGET"  if $_[0] eq "P4_DCON" or $_[0] eq "P4_DCoN"
	or "DC" eq $_[0];
    return "threadstate valueSET"  if $_[0] eq "P4_DSET" or $_[0] eq "P4_DSeT";

    return "forthword synonym"     if $_[0] eq "P4_FNYM";
    return "immediate synonym"     if $_[0] eq "P4_SNYM";
    return "obsolete forthword"    if $_[0] eq "P4_xOLD";
    return "obsolete immediate"    if $_[0] eq "P4_iOLD";
    return "exception declared"    if $_[0] eq "P4_EXPT";

    return "ordinary offsetval"    if $_[0] eq "P4_OFFS" or $_[0] eq "P4_OFFs";
    return "ordinary vocabulary"   if $_[0] eq "P4_OVOC";
    return "immediate vocabulary"  if $_[0] eq "P4_IVOC";
    return "loading slot id"       if $_[0] eq "P4_SLOT";
    return "loading slot size"     if $_[0] eq "P4_SSIZ";
    return "loading into"          if $_[0] eq "P4_INTO";
    return "loading wordset"       if $_[0] eq "P4_LOAD";
    return "loader code $_[0]";
}


sub per_ITEMWORDREF
{
    my ($in0,$in,$in2,$name,$intoref) = @_; 

    my $typewordref = ""; my $typeword = "";
    $in =~ s{ (<TYPEWORDREF(?:\s[^<>]*)?>) ((?:.(?!</TYPEWORDREF[\s>]))*.) 
		  (</TYPEWORDREF(?:\s[^<>]*)?>) }
    { $typewordref .= $1.$2.$3; $typeword = $2; "<:>" }gsex;

    my $cstrwordref = ""; my $cstrword = "";
    $in =~ s{ (<CSTRWORDREF(?:\s[^<>]*)?>) ((?:.(?!</CSTRWORDREF[\s>]))*.) 
		  (</CSTRWORDREF(?:\s[^<>]*)?>) }
    { $cstrwordref .= $1.$2.$3; $cstrword = $2; "" }gsex;

    my $linkwordref = ""; my $linkword = "";
    $in =~ s{ (<LINKWORDREF(?:\s[^<>]*)?>) ((?:.(?!</LINKWORDREF[\s>]))*.) 
		  (</LINKWORDREF(?:\s[^<>]*)?>) }
    { $linkwordref .= $1.$2.$3; $linkword = $2; "" }gsex;

    my $attrwordref = ""; 
    $in =~ s{ ^ ((?:.(?!<:>))*.) <:> } 
    { $attrwordref .= $1; "" }gsex;

    # decode the xml'ed CSTR
    my $refword = xm::sub::off3(xm::sub::C(xm::sub::on3($cstrword)));

    my $into = $$intoref;
    $into = "FORTH/DEF" if $$intoref eq "";
    $into = "" if $into =~ /\bFORTH\//;
    $into = "" if $into =~ /\bEXTENSIONS/; # FIXME: all EXT. should be chained
    $into .= " " if length $into;
    my $wordlist = $$intoref;
    $wordlist = "FORTH/DEF" if not length $$intoref;

    $typeword = coded($typeword);
    my $spc = " ";
    my $out = $in0.$attrwordref
	."<XREFWORDREF>".$into.$refword."</XREFWORDREF>".$spc
	."<MAKEWORDREF>".$spc
	."<INTOWORDLIST>".$wordlist."</INTOWORDLIST>".$spc
	."<FROMWORDSET>".$name."</FROMWORDSET>".$spc
	.$cstrwordref.$spc.$linkwordref.$spc
	."<TYPEWORDCODED>".$typeword."</TYPEWORDCODED>".$spc
	.$typewordref
	."</MAKEWORDREF>".$in2;
    
    # print STDERR "[",length $out,"]";
    return $out if (length $typeword == 2);
    
    if ($typeword =~ /loading into/)
    {
	$$intoref = $refword; # $cstrword;
	$$intoref = "FORTH/ANS" if $refword eq "[ANS]";
	$$intoref = "FORTH/BASE" if $refword eq "[FTH]";
	$$intoref = "FORTH/BASE" if $refword eq "FORTH";
	$$intoref = "FORTH/".$refword if length $linkword > 4;
	$in0 =~ s{(<) \w+} {$1."CDOCWORDREF"}sex;
	$in2 =~ s{(</)\w+} {$1."CDOCWORDREF"}sex;
	return $in0."<INTOWORDSET>".$refword."<INTOWORDSET>".$in2;
    }
	
    return $out;
}

sub per_LISTWORDREF
{
    my ($in,$name) = @_; 

    # embedded cdocs are kept, but renamed to CDOCWORDREF

    $in =~ s{ <CDOC(\s[^<>]*)?> ((?:.(?!</CDOC[\s>]))*.) 
		  </CDOC(\s[^<>]*)?> }
    { "<CDOCWORDREF".$1.">".xm::pfe::cutfcodedocs::format($2)
	  ."</CDOCWORDREF".$3.">" }gsex;
    
    # clean ITEMWORDREFs
    my $into = "";

    $in =~ s{ (<ITEMWORDREF(?:\s[^<>]*)?>) ((?:.(?!</ITEMWORDREF[\s>]))*.) 
		  (</ITEMWORDREF(?:\s[^<>]*)?>) }
    { per_ITEMWORDREF($1,$2,$3,$name,\$into) }gsex;

    print STDERR $name," ";
    # print STDERR " <wordset:",$name," ", length $in, ">\n";
    return $in;
}

sub per_ITEMWORDSET
{
    my $in = shift;
    my $out = ""; 

    my $cdocwordset = "";
    $in =~ s{ (<CDOCWORDSET(?:\s[^<>]*)?>) 
		  ((?:.(?!</CDOCWORDSET[\s>]))*.) 
		  (</CDOCWORDSET(?:\s[^<>]*)?>) }
    { $cdocwordset .= $1.$2.$3; "" }gsex;
    my $namewordset = ""; my $wordset;
    $in =~ s{ (<NAMEWORDSET(?:\s[^<>]*)?>) ((?:.(?!</NAMEWORDSET[\s>]))*.) 
		  (</NAMEWORDSET(?:\s[^<>]*)?>) }
    { $namewordset .= $1.$2.$3; $wordset = $2; "" }gsex;
    my $cstrwordcnt = "";
    $in =~ s{ <(CSTRWORDCNT)(\s[^<>]*)?(?=>) ((?:.(?!</CSTRWORDCNT[\s>]))*.) 
		  </CSTRWORDCNT(\s[^<>]*)?> } 
    { $cstrwordcnt .= "<TITLEWORDSET".$2.$3."</TITLEWORDSET".$4.">"; "" }gsex;
    $in =~ s{ (<LISTWORDREF(?:\s[^<>]*)?>) ((?:.(?!</LISTWORDREF[\s>]))*.) 
		  (</LISTWORDREF(?:\s[^<>]*)?>) } 
    {  $out .= $cstrwordcnt.$namewordset.$cdocwordset
	   .$1.per_LISTWORDREF($2,$wordset).$3; "" }gsex;

    return $out;
}

sub DO
{
    my $in = shift;
    my $out = "";
    my $comment;

    print STDERR "<wordsets> ";
    $in =~ s{ (<ITEMWORDSET(?:\s[^<>]*)?>)  
		  ((?:.(?!</?ITEMWORDSET[\s>]))*.) 
		  (</ITEMWORDSET(?:\s[^<>]*)?>)
		  }
    { $1.per_ITEMWORDSET($2).$3 }gsex;
    print STDERR "</wordsets>\n";

    return $in;
}

sub ARGS { return    xm::o::args_stdin(@_,DESC); }
sub main { return DO(xm::o::args_stdin(@_,DESC)); }

1;