package xm::cread;
use strict;
use xm::o;
"
  take the plane C source code and convert it into xml'ish, which would be 
  almost like calling xm::sub.off on the complete source text. 
  (xm::sub.off replaces  the special-chars [\&<>\"] with their entity-refs). 
 
 Yet this would loose just  too much information, instead this subroutine 
  marks the C tokens as
    CSTR = c-string (\"...\")
    CCHR = c-char   ('...')
    CDOC = c-comment (/*...*/ and //...)
  and the preprocessor lines are special too, so there is
    CPRE = c-preprocessor line (#...)
  while still making sure to mark CSTR/CCHR/CDOCs inside CPRE which is the 
  way newer cpp(1) is supposed to work.
  much of the later scanner-modules may call xm::sub.on on the enclosed text
  to scan in a more intelligible way (understood by C programmers).
"}
    my ($in,$cdoc,$cchr,$cstr,$cpre,$cerr) = @_;
    $cdoc = "CDOC" if not defined $cdoc or not length $cdoc;
    $cchr = "CCHR" if not defined $cchr or not length $cchr;
    $cstr = "CSTR" if not defined $cstr or not length $cstr;
    $cpre = "CPRE" if not defined $cpre or not length $cpre;
    $cerr = "CERR" if not defined $cerr or not length $cerr;
    $in =~ s{\&} {\&}gs;
    $in =~ s{\<} {\<}gs;
    $in =~ s{\>} {\>}gs;
    
    my $cpp = sub {
       my $in = shift;
    
       $in =~ s{ / (
                 / .*                   |
                 \* [\s\S]*? \*/        )                       | ('(?:[^\\\']|\\.)+')     | ("(?:[^\\\"]|\\.)*")     } {
		defined $1 ? "<$cdoc>"."/".$1."</$cdoc>" :
                  defined $2 ? "<$cchr>".$2."</$cchr>" :
                    defined $3 ? "<$cstr>".$3."</$cstr>" : 
                           "<$cerr></$cerr>"
            }xgem ;
        return $in;
    };
    
    $in =~ s{ / (
                 / .*                   |
                 \* [\s\S]*? \*/        )                       | ('(?:[^\\\']|\\.)+')     | ("(?:[^\\\"]|\\.)*")     | ( ^ \s* \( \\ $ \n .* )* )      } {
		defined $1 ? "<$cdoc>"."/".$1."</$cdoc>" :
		 defined $4 ? "<$cpre>".&$cpp($4)."</$cpre>" :
                  defined $2 ? "<$cchr>".$2."</$cchr>" :
                    defined $3 ? "<$cstr>".$3."</$cstr>" :
                           "<$cerr></$cerr>"
            }xgem ;
    $in =~ s{\"} {\"}gs;
    $in =~ s{(<$cpre>)(\s+)}{$2$1}gs;
return $in;
}
 return    xm::o::args_stdin(@_, DESC); }
 return DO(xm::o::args_stdin(@_, DESC)); }
1;