package xm::code::func; use xm::code::vars; use xm::sub; use xm::o; *REGEX = \0; *STARTTAG = \1; *ENDTAG = \2; *CHILDREGEX = \3; *POSFROM = \4; *POSUPTO = \5; *INDEX = \6; sub DOC {" functions to process a rule-table which came from Peter Palfrader's code2html Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. "} sub put_output { my ( $params, $snippetlist_ref, $STYLE_REF ) = @_ ; my $result; my $prefix = ''; $prefix = $params->{'line_number_prefix'}.'_' if defined $params->{'line_number_prefix'}; $result = & { $ { $$STYLE_REF{'linenumbers'} }{$params->{'linenumbers'}} } (join ('', @$snippetlist_ref), $prefix); print FILEHANDLE $result unless (defined $params->{'dont_print_output'} && $params->{'dont_print_output'}); print FILEHANDLE $$STYLE_REF{'footer'} unless $params->{'noheader'}; if (defined($params->{'outfile'})) { unless ($params->{'outfile'} eq '-'){ close (FILEHANDLE); close (STDOUT); open (STDOUT, ">&SAVEOUT"); }; }; return $result; }; sub create_snippetlist { my ( $regexps_ref, $code, $snippetlist_ref, $style_ref ) = @_ ; my $length = length( $code ); ## An array of regular expression sturctures, each of which is an ## array. @res is kept sorted by starting position of the RExen and ## then by the position of the regex in the language file. This allows ## us to just evaluate $res[0], and to hand write fast code that typically ## handles 90% of the cases without resorting to the _big_ guns. ## ## FWIW, I pronounce '@res' REEZE, as in the plural of '$re'. ## my @res ; my $pos ; my $starttag; my $endtag; for ( @$regexps_ref ) { pos( $code ) = 0 ; #++$m ; next unless $code =~ m/($_->{regex})/gms ; # (rule matches any line in the code) # print STDERR "<regex style=\"$_->{name}\">",$_->{regex}, "</regex>\n" ; $starttag = $_->{starttag}; $endtag = $_->{endtag}; if (not defined $starttag) { $starttag = "<is-".$_->{style}; $starttag =~ s/ /-/g; $starttag .= " rule=\"".xm::sub::off($_->{name})."\"" if defined $$style_ref{v}; $starttag .= ">"; } if (not defined $endtag) { $endtag = "</is-".$_->{style}.">"; $endtag =~ s/ /-/g; } $pos = pos( $code ) ; # (the first line that matched with this type of rule) # $res[@res] = [ # $_->{regex}, # $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'start' }, # $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'stop' }, # $_->{childregex}, # $pos - length( $1 ), # $pos, # scalar( @res ), # ] ; $res[@res] = [ $_->{regex}, # *REGEX = 0 $starttag, # *STARTTAG = 1 $endtag, # *ENDTAG = 2 $_->{childregex}, # *CHILDREGEX = 3 $pos - length( $1 ), # *POSFROM = 4 $pos, # *POSUPTO = 5 scalar( @res ), # *INDEX = 6 ] ; } ## 90% of all child regexes end up with 0 or 1 regex that needs to be ## worried about. Trimming out the 0's speeds things up a bit and ## makes the below loop simpler, since there's always at least ## 1 regexp. It donsn't speed things up much by itself: the percentage ## of times this fires is really small. But it does simplify the loop ## below and speed it up. unless ( @res ) { push @$snippetlist_ref, xm::sub::off($code) ; return ; ## see RECURSE below in MAIN part } @res = sort { $a->[$POSFROM] <=> $b->[$POSFROM] || $a->[$INDEX] <=> $b->[$INDEX] } @res ; ## Add a dummy at the end, which makes the logic below simpler / faster. $res[@res] = [ undef, undef, undef, undef, $length, $length, scalar( @res ), ] ; ## These are declared here for (minor) speed improvement. my $re ; my $match_spos ; my $match_pos ; my $re_spos ; my $re_pos ; my $re_num ; my $prefix ; my $snippet ; my $rest ; my $i ; my $l ; my @changed_res ; my $j ; $pos = 0 ; MAIN: while ( $pos < $length ) { $re = $res[0] ; $match_spos = $re->[$POSFROM] ; $match_pos = $re->[$POSUPTO] ; if ( $match_spos > $pos ) { $prefix = xm::sub::off(substr( $code, $pos, $match_spos - $pos )) ; push @$snippetlist_ref, $prefix ; } if ( $match_pos > $match_spos ) { $snippet = substr( $code, $match_spos, $match_pos - $match_spos ) ; if ( @{$re->[$CHILDREGEX]} ) { push @$snippetlist_ref, $re->[$STARTTAG] ; create_snippetlist( $re->[$CHILDREGEX], ## RECURSE !!!!!!! $snippet, $snippetlist_ref, $style_ref ) ; push @$snippetlist_ref, $re->[$ENDTAG] ; } else { push @$snippetlist_ref, $re->[$STARTTAG], xm::sub::off($snippet), $re->[$ENDTAG]; } } $pos = $match_pos ; ## ## Hand coded optimizations. Luckily, the cases that arise most often ## are the easiest to tune. ## # =pod if ( $res[1]->[4] >= $pos ) { ## Only first regex needs to be moved, 2nd and later are still valid. ## This is often 90% of the cases for Perl or C (others not tested, ## just uncomment the $n, $o, and $p lines and try it yourself). #++$n{1} ; #++$m ; pos( $code ) = $pos ; unless ( $code =~ m/($re->[$REGEX])/gms ) { #++$o{'0'} ; if ( @res == 2 ) { ## If the only regexp left is the dummy, we're done. $rest = xm::sub::off(substr( $code, $pos )) ; push @$snippetlist_ref, $rest ; last ; } shift @res ; } else { $re->[$POSUPTO] = $re_pos = pos( $code ) ; $re->[$POSFROM] = $re_spos = $re_pos - length( $1 ) ; ## Walk down the array looking for $re's new home. ## The first few loop iterations are unrolled and done manually ## for speed, which handles 85 to 90% of the cases where only ## $re needs to be moved. ## ## Here's where that dummy regexp at the end of the array comes ## in handy: we don't need to worry about array size here, since ## it will always be after $re no matter what. The unrolled ## loop stuff is outdented to make the conditionals fit on one ## 80 char line. ## Element 4 in @{$res[x]} is the start position of the match. ## Element 6 is the order in which it was declared in the lang file. $re_num = $re->[$INDEX] ; if ( ( $re_spos <=> $res[1]->[$POSFROM] || $re_num <=> $res[1]->[$INDEX] ) <= 0 ) { #++$o{'1'} ; next } $res[0] = $res[1] ; #++$o{'2'} ; if ( ( $re_spos <=> $res[2]->[$POSFROM] || $re_num <=> $res[2]->[$INDEX] ) <= 0 ) { $res[1] = $re ; next ; } $res[1] = $res[2] ; if ( ( $re_spos <=> $res[3]->[$POSFROM] || $re_num <=> $res[3]->[$INDEX] ) <= 0 ) { #++$o{'3'} ; $res[2] = $re ; next ; } $res[2] = $res[3] ; if ( ( $re_spos <=> $res[4]->[$POSFROM] || $re_num <=> $res[4]->[$INDEX] ) <= 0 ) { #++$o{'3'} ; $res[3] = $re ; next ; } $res[3] = $res[4] ; if ( ( $re_spos <=> $res[5]->[$POSFROM] || $re_num <=> $res[5]->[$INDEX] ) <= 0 ) { #++$o{'4'} ; $res[4] = $re ; next ; } $res[4] = $res[5] ; #++$o{'ugh'} ; $i = 6 ; $l = $#res ; for ( ; $i < $l ; ++$i ) { last if ( ( $re_spos <=> $res[$i]->[$POSFROM] || $re_num <=> $res[$i]->[$INDEX] ) <= 0 ) ; $res[$i-1] = $res[$i] ; } #++$p{sprintf( "%2d", $i )} ; $res[$i-1] = $re ; } next ; } # =cut ## ## End optimizations. You can comment them all out and this net ## does all the work, just more slowly. If you do that, then ## you also need to comment out the code below that deals with ## the second entry in @res. ## #my $ni = 0 ; ## First re always needs to be tweaked #++$m ; #++$ni ; pos( $code ) = $pos ; unless ( $code =~ m/($re->[$REGEX])/gms ) { if ( @res == 2 ) { ## If the only regexp left is the dummy, we're done. $rest = xm::sub::off(substr( $code, $pos )) ; push @$snippetlist_ref, $rest ; last ; } shift @res ; @changed_res = () ; $i = 0 ; } else { $re->[$POSUPTO] = $re_pos = pos( $code ) ; $re->[$POSFROM] = $re_pos - length( $1 ) ; @changed_res = ( $re ) ; $i = 1 ; } ## If the optimizations above are in, the second one always ## needs to be tweaked, too. $re = $res[$i] ; #++$m ; #++$ni ; pos( $code ) = $pos ; unless ( $code =~ m/($re->[$REGEX])/gms ) { if ( @res == 2 ) { ## If the only regexp left is the dummy, we're done. $rest = xm::sub::off(substr( $code, $pos )) ; push @$snippetlist_ref, $rest ; last ; } shift @res ; } else { $re->[$POSUPTO] = $re_pos = pos( $code ) ; $re->[$POSFROM] = $re_spos = $re_pos - length( $1 ) ; if ( @changed_res && ( $changed_res[0]->[$POSFROM] <=> $re_spos || $changed_res[0]->[$INDEX] <=> $re->[$INDEX] ) > 0 ) { unshift @changed_res, $re ; } else { $changed_res[$i] = $re ; } ++$i ; } for ( ; ; ++$i ) { local $_ = $res[$i] ; #++$m ; last if $_->[$POSFROM] >= $pos ; #++$ni ; #++$m ; pos( $code ) = $pos ; unless ( $code =~ m/($_->[$REGEX])/gms ) { if ( @res <= 2 ) { $rest = xm::sub::off(substr( $code, $pos )) ; push @$snippetlist_ref, $rest ; last MAIN ; } ## If this regex is no longer needed, remove it by not pushing it ## on to @changed_res. This means we need one less slot in @res. shift @res ; redo ; } $_->[$POSUPTO] = $re_pos = pos( $code ) ; $_->[$POSFROM] = $re_spos = $re_pos - length( $1 ) ; ## Insertion sort in to @changed_res $re_num = $_->[6] ; for ( $j = $#changed_res ; $j > -1 ; --$j ) { last if ( ( $changed_res[$j]->[$POSFROM] <=> $re_spos || $changed_res[$j]->[$INDEX] <=> $re_num ) < 0 ) ; $changed_res[$j+1] = $changed_res[$j] ; } $changed_res[$j+1] = $_ ; } ## Merge sort @changed_res and @res in to @res $j = 0 ; $l = $#res ; for ( @changed_res ) { while ( $i < $l && ( $_->[$POSFROM] <=> $res[$i]->[$POSFROM] || $_->[$INDEX] <=> $res[$i]->[$INDEX] ) > 0 ) { $res[$j++] = $res[$i++] ; } $res[$j++] = $_ ; } # =cut } }; ##sub create_snippetlist ## { ## my ( $regexps_ref, $code, $snippetlist_ref ) = @_ ; ## my $length = length( $code ); ## my @regexps; ## $regexps[scalar(@$regexps_ref)] = undef; ## my $head_ptr = undef; ## my $current_ptr; ## my $help_ptr; ## my $index = 0; ## for (@$regexps_ref) ## { ## $current_ptr = $regexps[$index]; #0: start_ptr 1: length 2: next_ptr, 3: regex, 4:start, 5:end, 6: child 7: index ## $current_ptr->[7] = $index++; ## $current_ptr->[6] = $$_{'childregex'}; ## $current_ptr->[5] = $$_{'endtag'}; ## $current_ptr->[4] = $$_{'starttag'}; ## $current_ptr->[3] = $$_{'regex'}; ## pos( $code ) = 0; ## if ( $code =~ /($current_ptr->[3])/gms ) { $current_ptr->[0] = pos ($code) - length($1); $current_ptr->[1] = length($1); } else {next}; ## if (!defined ($head_ptr) || $current_ptr->[0] < $head_ptr->[0] ) ## { ## $current_ptr->[2] = $head_ptr; ## $head_ptr = $current_ptr; ## } ## else ## { ## $help_ptr = $head_ptr; ## $help_ptr = $help_ptr->[2] ## while (defined ( $help_ptr->[2] ) && ($current_ptr->[0] >= $help_ptr->[2]->[0]) ); #iow: while (defined help->next && current->pos <= help->next->pos) ## $current_ptr->[2] = $help_ptr->[2]; ## $help_ptr->[2] = $current_ptr; ## }; ## }; ## my $endpos = 0; ## my $oldhead; ## my %entities ; ## $entities{'&'} = '&' ; ## $entities{'<'} = '<' ; ## $entities{'>'} = '>' ; ## $entities{'"'} = '"' ; ## my $snippet; ## while (defined $head_ptr) ## { ## if ($head_ptr->[0] - $endpos > 0) { ## $snippet = substr($code, $endpos, $head_ptr->[0] - $endpos); ## $snippet =~ s/($ENTITIES)/$ENTITIES{$1}/ge; #"]); ## push @$snippetlist_ref, $snippet; ## }; ## push @$snippetlist_ref, $head_ptr->[4]; ## &create_snippetlist( $head_ptr->[6], substr($code, $head_ptr->[0], $head_ptr->[1]) , $snippetlist_ref); ## push @$snippetlist_ref, $head_ptr->[5]; ## $endpos = $head_ptr->[0] + $head_ptr->[1]; ## # update & repair list : ## $oldhead = $head_ptr; ## # 1) shift now invalid matches from list ## $help_ptr = $head_ptr; ## $help_ptr = $help_ptr->[2] ## while (defined ( $help_ptr->[2] ) && ($endpos > $help_ptr->[2]->[0]) ); ## $head_ptr = $help_ptr->[2]; ## $help_ptr->[2] = undef; ## # 2) rematch invalid matches and insert them into the list ## while (defined $oldhead) ## { ## $current_ptr = $oldhead; ## $oldhead = $oldhead->[2]; ## pos( $code ) = $endpos; ## if ( $code =~ /($current_ptr->[3])/gms ) { $current_ptr->[0] = pos ($code) - length($1); $current_ptr->[1] = length($1); } else {next}; ## if (!defined ($head_ptr) || ## ($current_ptr->[0] < $head_ptr->[0]) || ## ( ## ( $current_ptr->[0] == $head_ptr->[0]) && ## ( $current_ptr->[7] < $head_ptr->[7]) ## ) ## ) ## { ## $current_ptr->[2] = $head_ptr; ## $head_ptr = $current_ptr; ## } ## else ## { ## $help_ptr = $head_ptr; ## $help_ptr = $help_ptr->[2] ## while (defined ( $help_ptr->[2] ) && ## ( ## ($current_ptr->[0] > $help_ptr->[2]->[0]) || ## ( ## ( $current_ptr->[0] == $help_ptr->[2]->[0]) && ## ( $current_ptr->[7] > $help_ptr->[2]->[7]) ## ) ## ) ## ); #iow: while (defined help->next && current->pos <= help->next->pos) # if two patterns match at the same pos ## # the one that was declared earlier is taken ## $current_ptr->[2] = $help_ptr->[2]; ## $help_ptr->[2] = $current_ptr; ## }; ## }; ## # 3) done ## }; ## $snippet = substr($code, $endpos); $snippet =~ s/($ENTITIES)/$ENTITIES{$1}/ge; #" ]); ## push @$snippetlist_ref, $snippet; ## }; sub DO { my @snippetlist; my %styleref = { }; $styleref{v} = $o{verbose} if exists $o{verbose}; $styleref{v} = $o{"code-types"} if exists $o{"code-types"}; create_snippetlist ($_[1],$_[0],\@snippetlist, \%styleref); # create_snippetlist ($LANGUAGE{$_[1]}{patterns},$_[0],\@snippetlist, \%styleref); return join ('',@snippetlist ); }