package xm::cdocs;
use strict;
use xm::o;
use xm::sub;
"
here we scan for CDOCs - up to now they will probably be completly mark_off'd,
so this subroutine mark_on all of it and tries to figure out which of the
angles [<>] could actually be some xml'ish markup. It will leave those and
just mark_off the rest. Sure, here we use some heuristics and it is up to
you to enhance the heuristics gradually over time.
"}
my $in = shift;
$in =~ s{\&26;} {\&\;}gs;
$in =~ s{\<} {\<\;}gs;
$in =~ s{\>} {\>\;}gs;
$in =~ s{\"} {\"\;}gs;
return $in;
}
sub do_litstr
{
my $in = shift;
$in =~ s{\\([^\d\w])} { xm::sub::off($1) }gs; # handle backslash-escapes
$in =~ s{\&26\;} {\&\;}gs;
$in =~ s{\<} {\<\;}gs;
$in =~ s{\>} {\>\;}gs;
$in =~ s{\"} {\"\;}gs;
return $in;
}
# be smart about accepting comment text, the sub will try to recognize any
# <...>-sequence that looks like xml. If there are angles that do not look like
# xml-tag they'll be replaced with their resp. entity-refs. Even more, this code
# does also introduce the ref-markup.
sub do_ccc
{
my $in = shift;
$in = xm::sub::on($in);
$in =~ s{\&} {\&\#26\;}gs;
$in =~ s{=> (\s+)(\S+)(\s|$)} {"&[;XREF&];".do_litnam($2)."&[;/XREF&];".$3}gmex;
$in =~ s{=> \"((?:[^\\\"]|\\.)*)\"} {"&[;XREF&];".do_litstr($1)."&[;/XREF&];"}gsex;
$in =~ s{=> \'((?:[^\\\']|\\.)*)\'} {"&[;XREF&];".do_litstr($1)."&[;/XREF&];"}gsex;
$in =~ s{\<([^<>!=?]*)\>} {&[;$1&];}gm; $in =~ s{<([/!?])([^<>]*)>} {&[;$1$2&];}gm; # find </*>-tags
$in =~ s{<(\w+\s+\w+=)([^<>]*)>} {&[;$1$2&];}gs; $in =~ s{<} {<}gs;
$in =~ s{>} {>}gs;
$in =~ s{\&\[\;} {\<}gs;
$in =~ s{\&\]\;} {\>}gs;
return $in;
}
my $in = shift;
$in =~ s{ (<CDOC>) ( (?:.(?!</?CDOC>))* .) (</CDOC>) }
{ $1.do_ccc($2).$3 }gsex;
return $in;
}
return xm::o::args_stdin(@_,DESC); }
return DO(xm::o::args_stdin(@_,DESC)); }
1;