package xm::cread;
use strict;
use xm::o;
"
take the plane C source code and convert it into xml'ish, which would be
almost like calling xm::sub.off on the complete source text.
(xm::sub.off replaces the special-chars [\&<>\"] with their entity-refs).
Yet this would loose just too much information, instead this subroutine
marks the C tokens as
CSTR = c-string (\"...\")
CCHR = c-char ('...')
CDOC = c-comment (/*...*/ and //...)
and the preprocessor lines are special too, so there is
CPRE = c-preprocessor line (#...)
while still making sure to mark CSTR/CCHR/CDOCs inside CPRE which is the
way newer cpp(1) is supposed to work.
much of the later scanner-modules may call xm::sub.on on the enclosed text
to scan in a more intelligible way (understood by C programmers).
"}
my ($in,$cdoc,$cchr,$cstr,$cpre,$cerr) = @_;
$cdoc = "CDOC" if not defined $cdoc or not length $cdoc;
$cchr = "CCHR" if not defined $cchr or not length $cchr;
$cstr = "CSTR" if not defined $cstr or not length $cstr;
$cpre = "CPRE" if not defined $cpre or not length $cpre;
$cerr = "CERR" if not defined $cerr or not length $cerr;
$in =~ s{\&} {\&}gs;
$in =~ s{\<} {\<}gs;
$in =~ s{\>} {\>}gs;
my $cpp = sub {
my $in = shift;
$in =~ s{ / (
/ .* |
\* [\s\S]*? \*/ ) | ('(?:[^\\\']|\\.)+') | ("(?:[^\\\"]|\\.)*") } {
defined $1 ? "<$cdoc>"."/".$1."</$cdoc>" :
defined $2 ? "<$cchr>".$2."</$cchr>" :
defined $3 ? "<$cstr>".$3."</$cstr>" :
"<$cerr></$cerr>"
}xgem ;
return $in;
};
$in =~ s{ / (
/ .* |
\* [\s\S]*? \*/ ) | ('(?:[^\\\']|\\.)+') | ("(?:[^\\\"]|\\.)*") | ( ^ \s* \( \\ $ \n .* )* ) } {
defined $1 ? "<$cdoc>"."/".$1."</$cdoc>" :
defined $4 ? "<$cpre>".&$cpp($4)."</$cpre>" :
defined $2 ? "<$cchr>".$2."</$cchr>" :
defined $3 ? "<$cstr>".$3."</$cstr>" :
"<$cerr></$cerr>"
}xgem ;
$in =~ s{\"} {\"}gs;
$in =~ s{(<$cpre>)(\s+)}{$2$1}gs;
return $in;
}
return xm::o::args_stdin(@_, DESC); }
return DO(xm::o::args_stdin(@_, DESC)); }
1;