pp_maybe
sub
package xm::pp;
use strict;
use xm::sub;
use xm::o;

sub DESC {"
  this is a preprocessor that works on some input containing
  xm definitions.

  (More Documentation Should Be Here, But Is Not. Contact
   The Author. It is guidod\@gmx.de, now go and blame him).
"}

# --------------- helpers -------------

my @enter_level; # = ( " ","pr-level-error" );

sub pr
{
    my $flag = shift;
    my @all = @_;
    my $flag2 = $flag; $flag2 =~ s{(\w+)\-.*}{$1};
    if (exists $o{$flag} or exists $o{$flag2}) 
    { 	print STDERR "\n"," " x $#enter_level;
	print STDERR "<",join(" ",@all),">"; 
    }
    return 1; 
}

sub pr_hash
{
    my $flag = shift;
    my $hash = shift;
    my @all = @_;
    my $flag2 = $flag; $flag2 =~ s{(\w+)\-.*}{$1};
    if (exists $o{$flag} or exists $o{$flag2})
    { 	print STDERR "\n"," " x $#enter_level;
	print STDERR "<",join(" ",@all),">"; 
	my $k;
	for $k (sort keys %$hash)
	{
	    print STDERR "\n", " " x ( $#enter_level + 1);
	    print STDERR $k, " => ", $$hash{$k};
	}
     	print STDERR "\n"," " x $#enter_level;
	print STDERR "</",$all[0],">"; 
    }
}

sub pr_enter
{
    if (pr @_)
    {
	push @enter_level, $_[0];
	push @enter_level, $_[1];
    }
}

sub pr_leave
{
   my $text = pop @enter_level;
   my $flag = pop @enter_level;
   if (exists $o{$flag}) 
	{ 	print STDERR "\n"," " x $#enter_level;
	  	print STDERR "</",$text,">"; 
		return 1; 
	}
   $flag =~ s{(\w+)\-.*}{$1};
   if (exists $o{$flag}) 
	{ 	print STDERR "\n"," " x $#enter_level;
	  	print STDERR "</",$text,">"; 
		return 1; 
	}
   return 0;
}

# end of for the given sequence of xm-tags
sub e
{
    my $mA = shift;
    my $mE = "";
    $mA =~ s{ <([^<>\s]+)[^<>]*> } { $mE = "</".$1.">".$mE; $& }gsex;
    return $mE;
}

# -------------- runstages ------------

sub runstagedone
{
    my $in = $_[0];
    $in =~ s/<:(\/?\w)/<$1/gs;
    return $in;
}

# walk direct pairs of xm-markups and feed its function the
# the body just verbatim. Paste back the content and rerun
# until no verbatim replacements are done anymore. Upper limit
# is 100 recursions.
sub runstage2
{
    my ($in,$stage) = @_;
    my $max = 100;
    my ($tag,$att,$txt,$end,$res);

    pr_enter "debug-stages","runstage2",$stage;

    while (--$max and $in =~
    s{ <(\w[^\s<>]*)(\s[^<>]*)?(?=>) ((?:.(?!</?\1[\s>]))*.) </\1(\s[^<>]*)?> }
    { 
	$tag = $1;
	$att = xm::o::xx $2;
	$txt = poch($3);
	$end = xm::o::xx $4;
	$res = "<:".$tag.$att.">".$txt."<:/".$tag.$end.">";

	pr "debugging", "tag", $tag;
	if (exists $xm{pp}{$stage}{$tag}{run})
	{
	    my $f = $xm{pp}{$stage}{$tag}{run};
	    if (exists $xm{pp}{$stage}{$tag}{arg})
	    {
		$res = &$f($txt,$xm{pp}{$stage}{$tag}{arg},$tag,$att.$end);
	    }else{
		$res = &$f($txt,$stage,$tag,$att.$end);
	    }
	}elsif (exists $xm{pp}{$stage}{$tag}{arg}) {
	    # the default for arg w/o run: eval it as perl on $txt as $_
	    local $_ = $txt;
	    eval $xm{pp}{$stage}{$tag}{arg};
	    $res = $_;
	};

	$res
    }gsex) {};
    $in =~ s{^\s*}{}s;

    pr_leave;
    return runstagedone($in);
}

# instead of txt it runs on tag, but tag may also start/end with ? or !
sub runstage1
{
    my ($in,$stage) = @_;
    my $max = 100;
    my ($tag,$att,$end,$res);

    pr_enter "debug-stages","runstage1",$stage;

    while (--$max and $in =~
    s{ <([\?]?\w[^\s<>]*)(\s[^<>]*)?([\/\?]>) } 
    { 
	$tag = $1;
	$att = xm::o::xx $2;
	$end = $3;
	$res = "<:".$tag.$att.$end;

	pr "debugging", "tag", $tag;
	if (exists $xm{pp}{$stage}{$tag}{run})
	{
	    my $f = $xm{pp}{$stage}{$tag}{run};
	    if (exists $xm{pp}{$stage}{$tag}{arg})
	    {
		$res = &$f($tag.$att,$xm{pp}{$stage}{$tag}{arg},$tag,$att);
	    }else{
		$res = &$f($tag.$att,$stage,$tag,$att);
	    }
	}

	$res
    }gsex) {};
    $in =~ s{^\s*}{}s;

    pr_leave;
    return runstagedone($in);
}

# unlike runstage0, apply also to tags that are not proper xml-singular
# in this case however, their attribute facility is crippled. 
sub runstage0
{
    my ($in,$stage) = @_;
    my $max = 100;
    my ($tag,$att,$end,$res);

    pr_enter "debug-stages","runstage0",$stage;

    while (--$max and $in =~
    s{ <(\w[^\s<>]*)(\s[^<>]*)?(>) } 
    { 
	$tag = $1;
	$att = xm::o::xx $2;
	$end = $3;
	$res = "<:".$tag.$att.$end;

	pr "debugging", "tag", $tag;
	if (exists $xm{pp}{$stage}{$tag}{run})
	{
	    my $f = $xm{pp}{$stage}{$tag}{run};
	    if (exists $xm{pp}{$stage}{$tag}{arg})
	    {
		$res = &$f($tag.$att,$xm{pp}{$stage}{$tag}{arg},$tag,$att);
	    }else{
		$res = &$f($tag.$att,$stage,$tag,$att);
	    }
	}

	$res
    }gsex) {};
    $in =~ s{^\s*}{}s;

    pr_leave;
    return runstagedone($in);
}

# actually a runstage1 but only for entity-tags. That good for doing
# the inclusions. The $att is used in the place of $txt.
sub runstage_
{
    my ($in,$stage) = @_;
    my $max = 100;
    my ($tag,$att,$end,$res);

    pr_enter "debug-stages","runstage_",$stage;

    while (--$max and $in =~
    s{ <([\!]\w[^\s<>]*)(\s[^<>]*)?(>) } 
    { 
	$tag = $1;
	$att = xm::o::xx $2;
	$end = $3;
	$res = "<:".xm::sub::off3($tag.$att).$end;

	pr "debugging", "tag", $tag;
	if (exists $xm{pp}{$stage}{$tag}{run})
	{
	    my $f = $xm{pp}{$stage}{$tag}{run};
	    if (exists $xm{pp}{$stage}{$tag}{arg})
	    {
		$res = &$f($att,$xm{pp}{$stage}{$tag}{arg},$tag,$att);
	    }else{
		$res = &$f($att,$stage,$tag,$att);
	    }
	}

	$res
    }gsex) {};
    $in =~ s{^\s*}{}s;

    pr_leave;
    return runstagedone($in);
}


# -------------- variables ------------

# so... how does it work... basically, right under the $xm{pp} key
# are the various stages that are executed in `reverse sort` order,
# and under each stage live the tags to be watched for, and under
# each tag we can find the "run" entry that will execute with that
# tag. 
# visually:
#    $xm{pp}{$stage}{$tag}{run} = \&func
#    $xm{pp}{$stage}{$tag}{arg} = $funcarg
#    $xm{pp}{$stage}{""} = \&runstage
# Actually, the &runstage function will specify the actual
# arguments to be given to \&func, including the specification
# where $funcarg will pop up in the func's @_, and what to do if
# there is no ${run} but an ${arg} present. Even the tag/run/arg
# layout could be different. You wouldn't want to do it actually,
# since in this script we make sure that everything has a sane default.

sub poch # unlike chop, it chops the first character, not the last one
{
    my $v = shift;
    $v =~ s/^.//;
    return $v;
}

# the main-function will run the stages. The default &runstage is
# called 
sub DO
{
    my $in = shift;
    pr_enter "debug-stages","xm::DO",@_;
    my $stage;

    $in = &runstage_($in,"---");

    for $stage (reverse sort keys %{$xm{pp}})
    {
	my $runstage;
	if (exists $xm{pp}{$stage}{""})
	{
	    $runstage = $xm{pp}{$stage}{""}
	}elsif ($stage =~ /^\w+0$/) {
	    $runstage = \&runstage0;
	}elsif ($stage =~ /^\w+1$/) {
	    $runstage = \&runstage1;
	}elsif ($stage =~ /^\w+2$/) {
	    $runstage = \&runstage2;
	}else{
	    next; # skip this stage
	}

	$in = &$runstage($in, $stage);
    }
    pr_leave;
    return $in;
}

sub ARGS { return    xm::o::args_stdin(@_, "xm-preprocessor"); }
sub main { return DO(xm::o::args_stdin(@_, "xm-preprocessor")); }

# ------------------------------------------------------------------- #

# $xm{pp}{verbs3} = { }; # which symbols should be called with verbatim body
# $xm{pp}{verbs2} = { }; # which symbols should be called with verbatim body
# $xm{pp}{verbs1} = { }; # which symbols to be replaced before other work
# $xm{pp}{calls3} = { };
# $xm{pp}{calls2} = { };
# $xm{pp}{calls1} = { };
# $xm{pp}{tags1} = { };
# $xm{pp}{tags2} = { };
# $xm{pp}{ents1} = { };

# run/verb -> tag/verb/call
# run/mark -> tag/text/call
# tag/verb -> tag/verb/mark
# tag/mark -> tag/text/mark

# registering 
# $xm{pp}->{ tag1/tag2/type }->{ markup }->{ verb/text/stage }->{ exec }
#         either $xm{pp}{tag1}{$nam}{$run} = { call => \&func };
#         ..or.. $xm{pp}{tag2}{$nam}{$run}{mark} = "<this>";
sub verb1 { $xm{pp}{verbs1}{arg}{$_[0]} = $_[1]; }
sub verb2 { $xm{pp}{verbs2}{arg}{$_[0]} = $_[1]; }
sub verb3 { $xm{pp}{verbs3}{arg}{$_[0]} = $_[1]; }
sub call1 { $xm{pp}{calls1}{arg}{$_[0]} = $_[1]; }
sub call2 { $xm{pp}{calls2}{arg}{$_[0]} = $_[1]; }
sub call3 { $xm{pp}{calls3}{arg}{$_[0]} = $_[1]; }
sub tag1  { $xm{pp}{tags1}{arg}{$_[0]} = $_[1]; }
sub tag2  { $xm{pp}{tags2}{arg}{$_[0]} = $_[1]; }
sub ent1  { $xm{pp}{ents1}{arg}{$_[0]} = $_[1]; }

sub mark2          # replaces the delimiters with arg
{
    my ($txt,$arg,$tag,$att) = @_;
    if (length $arg) { $arg =~ s/ \Q \$ \( \. \) \E / $att /sex; }
    return $arg.$_[0].xm::pp::e($arg);
}

sub mark1          # replace the markup arg
{
    my ($txt,$arg,$tag,$att) = @_;
    if (length $arg) { $arg =~ s/ \Q \$ \( \. \) \E / $att /sex; }
    return $arg;
}

sub call2          # call the body $with, keep the outer markups
{
    my ($txt,$arg,$tag,$att) = @_;
    return "<:".$tag.$att.">".&$arg(@_)."<:".$tag.">";
}

sub call1          # just all the args $with
{
    my ($txt,$arg,$tag,$att) = @_;
    return &{$arg}(@_);
}

# -------------------------------------------------------------- #
# .................... basic implementations ................... #

sub use_tag2
{
    my $tag = "";
    my $def = shift;
    $def =~ s{ ^[^<>]*<(\w[^\s<>]*)> } { $tag = $1; "" }gsex;
    if (length $tag) { 
	$def =~ s/\s*$//;
	$xm{pp}{tag2}{$tag}{arg} = $def; 
	$xm{pp}{tag2}{$tag}{run} = \&mark2;
    }
    pr "debug-use","use:tag2",$tag,$def;
    return "";
}
$xm{pp}{tag2}{"..."} = "...";
$xm{pp}{verb2}{"use:tag2"}{run} = \&use_tag2;

sub use_tag1
{
    my $tag = "";
    my $def = shift;
    $def =~ s{ ^[^<>]*<(\w[^\s<>]*)> } { $tag = $1; "" }gsex;
    if (length $tag) { 
	$def =~ s/\s*$//;
	$xm{pp}{tag1}{$tag}{arg} = $def; 
	$xm{pp}{tag1}{$tag}{run} = \&mark1;
    }
    pr "debug-use","use:tag1",$tag,$def;
    return "";
}
$xm{pp}{tag1}{"..."} = "...";
$xm{pp}{verb2}{"use:tag1"}{run} = \&use_tag1;

sub use_ent1
{
    my $tag = "";
    my $def = shift; 
    $def =~ s{ ^[^<>]*<(\w[^\s<>]*)> } { $tag = $1; "" }gsex;
    if (length $tag) { 
	$def =~ s/\s*$//;
	$xm{pp}{ent0}{$tag}{arg} = $def; 
	$xm{pp}{ent0}{$tag}{run} = \&mark1;
    }
    pr "debug-use","use:ent1",$tag,$def;
    return "";
}
$xm{pp}{ent0}{"..."} = "...";
$xm{pp}{verb2}{"use:ent1"}{run} = \&use_ent1;

$xm{pp}{ent0}{"lt"}{arg} = "&lt;";
$xm{pp}{ent0}{"lt"}{run} = \&mark1;
$xm{pp}{ent0}{"gt"}{arg} = "&gt;";
$xm{pp}{ent0}{"gt"}{run} = \&mark1;
$xm{pp}{ent0}{"amp"}{arg} = "&amp;";
$xm{pp}{ent0}{"amp"}{run} = \&mark1;
$xm{pp}{ent0}{"and"}{arg} = "&and;";
$xm{pp}{ent0}{"and"}{run} = \&mark1;
$xm{pp}{ent0}{"qq"}{arg} = "&quot;";
$xm{pp}{ent0}{"qq"}{run} = \&mark1;
$xm{pp}{ent0}{"quot"}{arg} = "&quot;";
$xm{pp}{ent0}{"quot"}{run} = \&mark1;


sub use_log
{
   print STDERR $_[0],"\n";
   return "";
}
$xm{pp}{verb2}{"use:log"}{run} = \&use_log;
$xm{pp}{verb2}{"!log"}{run} = \&use_log;

sub use_defs
{
    my $F = $_[0];
    my $done = 0;

    $F =~ s/^\s*//; $F =~ s/\s*$//;

    if (length $F and not exists $xm{pp}{used}{$F} and
        -f $F and open F, "<$F")
    {
	my $T = join("",<F>);
        close F;
	$xm{pp}{used}{$F} = 1;
        DO($T,"$F"); # discard the return value
        $done = 1;
    }
    else
    {
       my $p;
       for $p (@INC)
       {
	   $done = 1 if exists $xm{pp}{used}{"$p/$F.pm"};
           if (not exists $xm{pp}{used}{"$p/$F.pm"} and
               -f "$p/$F.pm" and open F, "<$p/$F.pm")
           {
               close F;
	       $xm{pp}{used}{"$p/$F.pm"} = 1;
 	       pr "debug-use","and:use","loading","$p/$F.pm";
	       require "$F.pm";	
	       $done = 1;
           }
	   $done = 1 if exists $xm{pp}{used}{"$p/$F.xm"};
           if (not exists $xm{pp}{used}{"$p/$F.xm"} and
               -f "$p/$F.xm" and open F, "<$p/$F.xm")
           {
	       my $T = join("",<F>);
               close F;
	       $xm{pp}{used}{"$p/$F.xm"} = 1;
 	       pr "debug-use", "and:use","loading","$p/$F.xm";
               DO($T,"$p/$F.xm"); # discard the return value
	       $done = 1;
           }
	   last if $done;
       }
    }
    pr "debug-use","and:use","notfound",$F if $done == 0;
    pr "debug-use","inc:path=",join(" ",@INC) if $done == 0;
    return "<!-- use $F #ignored -->" if $done == 0;
    return "";
}
$xm{pp}{"---"}{"!use"}{run} = \&use_defs;
$xm{pp}{verb2}{"use:defs"}{run} = \&use_defs;

sub use_doctype 
{
    my $k = $_[0];
    my $F;
    my $doctypes="doctype/";
    my $loaded="";

    pr "debug-doctype","doctype:",$k;
    for $F (split(' ',$k))
    {
	$F =~ s/\s*//g;
	next if $F =~ /^(-|SYSTEM|system)$/;
	pr "debug-doctype","doctype=",$F;

	if ($F =~ /\/$/) { 
	    $doctypes = "../../../".$F if -d "../../../".$F;
	    $doctypes = "../../".$F if -d "../../".$F;
	    $doctypes = "../".$F if -d "../".$F;
	    $doctypes = $F if -d $F;
	    next;	
	}

	if (-f $F.".pm" or -f $F.".xm")
	{
	    $loaded=$loaded." ".$F;
	    use_defs ($F);
	}elsif (-f $doctypes.$F.".pm" or -f $doctypes.$F.".xm")
	{
	    $loaded=$loaded." ".$doctypes.$F;
	    use_defs ($F);
	}else{
	    $loaded=$loaded." "."xm/doctype/".$F;
	    use_defs ("xm/doctype/".$F);
	}
    }
    return "<meta> doctypes: $loaded </meta>";
}
$xm{pp}{"---"}{"!doctype"}{run} = \&use_doctype;