package xm::pfe::grabwordset;
use strict;
use xm::o;

sub DESC
{"
  will look for those macro-pairs that form an WORDSET-export table
  to be used on pfe module load. Mark them.
"}

# NOTE: we add the CSTRWORDREF here but the C string is NOT decoded, BUT xml'd

sub do_block_2item
{
    if ($_[0] eq "CO" or $_[0] eq "P4_FXco" or $_[0] eq "P4_FXCO" or
	$_[0] eq "CI" or $_[0] eq "P4_IXco" or $_[0] eq "P4_IXCO" or
	$_[0] eq "CS" or $_[0] eq "P4_SXco" or $_[0] eq "P4_SXCO" or
	$_[0] eq "RT" or $_[0] eq "P4_RTco" or $_[0] eq "P4_RTCO" or
	$_[0] eq "CX" or $_[0] eq "P4_XXco" or $_[0] eq "P4_XXCO")
    {
	return
	    "<TYPEWORDREF>"
		.$_[0]."</TYPEWORDREF>".$_[1]
		    ."<CSTRWORDREF>".$_[2]."</CSTRWORDREF>".$_[3] 
			."<LINKWORDREF to=\"".$_[4]."-fcode\" >"
				.$_[4]."</LINKWORDREF to >".$_[5] ;
    }else{
	return
	    "<TYPEWORDREF>".$_[0]."</TYPEWORDREF>".$_[1]
		."<CSTRWORDREF>".$_[2]."</CSTRWORDREF>".$_[3] 
		    ."<LINKWORDREF>".$_[4]."</LINKWORDREF>".$_[5] ;
    }
}

sub do_block_1item
{
    return
	"<TYPEWORDREF>".$_[0]."</TYPEWORDREF>".$_[1]
	    ."<CSTRWORDREF>".$_[2]."</CSTRWORDREF>".$_[3] ;
}

sub do_block_list
{
    my $in = shift;
    my $ws = shift;
    my $n = 0;
    
    $in =~ s{ (\b\w+\b) (\s*[(]\s*) 
                  <(CSTR:?\w*(?:\s[^<>]*)?)> ((?:.(?!</?CSTR:?\w*[\s>]))*.) 
		      </CSTR:?\w*(?:\s[^<>]*)?> 
			  (\s*[)]\s*(?:\,|$ ))  } 
    { # replacing $3
	++$n;
	"<ITEMWORDREF id=\"".$ws.$n."\" >"
	    .do_block_1item($1,$2,$4,$5)
		."</ITEMWORDREF>"
		}gsex;
    
    $in =~ s{ (\b\w+\b) (\s*[(]\s*) 
                  <(CSTR:?\w*(?:\s[^<>]*)?)> ((?:.(?!</?CSTR:?\w*[\s>]))*.) 
		      </CSTR:?\w*(?:\s[^<>]*)?> 
			  (\s*\,\s*)
			      ([^\(\)\{\}\s]+ (?: \( [^\(\)\{\}]+ \) )?
			       | (?: <CSTR:?\w*(?:\s[^<>]*)?> 
				  (?:.(?!</?CSTR:?\w*[\s>]))*.? 
				  </CSTR:?\w*(?:\s[^<>]*)?> )
			       | (?: <CCHR:?\w*(?:\s[^<>]*)?> 
				  (?:.(?!</?CCHR:?\w*[\s>]))*.? 
				  </CCHR:?\w*(?:\s[^<>]*)?> )
			       )
				  (\s*[)]\s*(?:\,|$ ))  } 
    { # replacing $3
	++$n;
	"<ITEMWORDREF id=\"".$ws.$n."\" >"
	    .do_block_2item($1,$2,$4,$5,$6,$7)
		."</ITEMWORDREF>"
		}gsex; 
    
    $in =~ s{(</ITEMWORDREF>)([\ \t]*<CDOC>[^<>]*</CDOC>)}{$2$1}gm;
    
    return "<LISTWORDREF>".$in."</LISTWORDREF>";
} 

sub do_block
{
    my $in = shift;
    my $ws = shift;
    
    my $head = "";
    my $tail = "";
    $in =~ s{(^ \s* [\{] )} {$head = $1; ""}sex;
    $in =~ s{( [\}] \s* $)} {$tail = $1; ""}sex;
    
    return $head.do_block_list($in,$ws.".ext.").$tail;
} 

sub DO
{
    my $in = shift;

    # non-CDEFS style    
    $in =~ s{ (\b(?:P4_)?LISTWORDS) 
		  (\s*[(]\s*) (\w+) (\s*[)]\s*[=]\s*)
		  <(CBLK\d*)> ((?:.(?!</?CBLK\w*>))*.) </\5> (\s*[;]\s*)
              (\b(?:P4_)?COUNTWORDS) (\s*[(]\s*) 
                  (\w+) (\s*[,]\s*)
                  <(CSTR:?\w*)> ((?:.(?!</?CSTR:?\w*>))*.) </CSTR:?\w*> 
		  (\s*[)]\s*[;]) } 
            { print STDERR "<wordset $3>";
              "<ITEMWORDSET>"
              .$1.$2
              ."<NAMEWORDSET>".$3."</NAMEWORDSET>".$4
              ."<CBLKWORDSET>".do_block($6,$3)."</CBLKWORDSET>".$7 
		  # \5 replaced
              .$8.$9
              ."<NAMEWORDCNT>".$10."</NAMEWORDCNT>".$11    
		  # NAMEWORDSET == NAMEWORDCNT ?? -> !!
              ."<CSTRWORDCNT>".$13."</CSTRWORDCNT>".$14    
		  # \12 replaced
              ."</ITEMWORDSET>" }gsex;

#    # old-CDEFS style    
#    $in =~ s{ (<CDCL>\b(?:P4_)?LISTWORDS) (\s*\(\s*) (\w+) ((?:.(?!</?CDCL>))*.? </CDCL> \s*)
#        	 <(CBLK\d*)> ((?:.(?!</?CBLK\w*>))*.) </\5> </ITEMCDEF> (\s*\;\s*)
#                 (\b(?:P4_)?COUNTWORDS) (\s*\(\s*) 
#                 (\w+) (\s*\,\s*)
#                 <(CSTR:?\w*)> ((?:.(?!</?CSTR:?\w*>))*.) </CSTR:?\w*> (\s*\)\s*\;) } 
#            { print STDERR "<wordset $3>";
#              "<ITEMWORDSET>"
#              .$1.$2
#              ."<NAMEWORDSET>".$3."</NAMEWORDSET>".$4
#              ."<CBLKWORDSET>".do_block($6)."</CBLKWORDSET>".$7 # \5 replaced
#              .$8.$9
#              ."<NAMEWORDCNT>".$10."</NAMEWORDCNT>".$11    # NAMEWORDSET == NAMEWORDCNT ?? -> !!
#              ."<CSTRWORDCNT>".$13."</CSTRWORDCNT>".$14    # \12 replaced
#              ."</ITEMWORDSET></ITEMCDEF>" }gsex;

    # new-CDEFS style    
    $in =~ s{ (<CDCL>\b(?:P4_)?LISTWORDS) 
		  (\s*[(]\s*) (\w+) ((?:.(?!</?CDCL>))*.? </CDCL> \s*)
        	  <(CBLK\d*)> ((?:.(?!</?CBLK\w*>))*.) </\5> 
		  (\s*[;]?\s*) </ITEMCDEF> (\s*[;]?\s*)
              (\b(?:P4_)?COUNTWORDS) (\s*[(]\s*) 
                 (\w+) (\s*[,]\s*)
                 <(CSTR:?\w*)> ((?:.(?!</?CSTR:?\w*>))*.) </CSTR:?\w*> 
		 (\s*[)]\s*[;]) } 
            { print STDERR "<wordset $3>";
              "<ITEMWORDSET>"
              .$1.$2
              ."<NAMEWORDSET>".$3."</NAMEWORDSET>".$4
              ."<CBLKWORDSET>".do_block($6,$3)."</CBLKWORDSET>".$7 
		  # \5 replaced
              .$8.$9.$10
              ."<NAMEWORDCNT>".$11."</NAMEWORDCNT>".$12    
		  # NAMEWORDSET == NAMEWORDCNT ?? -> !!
              ."<CSTRWORDCNT>".$14."</CSTRWORDCNT>".$15    
		  # \13 replaced
              ."</ITEMWORDSET></ITEMCDEF>" }gsex;
    
    return $in;
} 

sub ARGS { return    xm::o::args_stdin(@_,DESC); }
sub main { return DO(xm::o::args_stdin(@_,DESC)); }

1;