Annotation of embedaddon/pcre/perltest.pl, revision 1.1
1.1 ! misho 1: #! /usr/bin/env perl
! 2:
! 3: # Program for testing regular expressions with perl to check that PCRE handles
! 4: # them the same. This is the version that supports /8 for UTF-8 testing. As it
! 5: # stands, it requires at least Perl 5.8 for UTF-8 support. However, it needs to
! 6: # have "use utf8" at the start for running the UTF-8 tests, but *not* for the
! 7: # other tests. The only way I've found for doing this is to cat this line in
! 8: # explicitly in the RunPerlTest script.
! 9:
! 10: # use locale; # With this included, \x0b matches \s!
! 11:
! 12: # Function for turning a string into a string of printing chars. There are
! 13: # currently problems with UTF-8 strings; this fudges round them.
! 14:
! 15: sub pchars {
! 16: my($t) = "";
! 17:
! 18: if ($utf8)
! 19: {
! 20: @p = unpack('U*', $_[0]);
! 21: foreach $c (@p)
! 22: {
! 23: if ($c >= 32 && $c < 127) { $t .= chr $c; }
! 24: else { $t .= sprintf("\\x{%02x}", $c); }
! 25: }
! 26: }
! 27:
! 28: else
! 29: {
! 30: foreach $c (split(//, $_[0]))
! 31: {
! 32: if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
! 33: else { $t .= sprintf("\\x%02x", ord $c); }
! 34: }
! 35: }
! 36:
! 37: $t;
! 38: }
! 39:
! 40:
! 41: # Read lines from named file or stdin and write to named file or stdout; lines
! 42: # consist of a regular expression, in delimiters and optionally followed by
! 43: # options, followed by a set of test data, terminated by an empty line.
! 44:
! 45: # Sort out the input and output files
! 46:
! 47: if (@ARGV > 0)
! 48: {
! 49: open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
! 50: $infile = "INFILE";
! 51: }
! 52: else { $infile = "STDIN"; }
! 53:
! 54: if (@ARGV > 1)
! 55: {
! 56: open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
! 57: $outfile = "OUTFILE";
! 58: }
! 59: else { $outfile = "STDOUT"; }
! 60:
! 61: printf($outfile "Perl $] Regular Expressions\n\n");
! 62:
! 63: # Main loop
! 64:
! 65: NEXT_RE:
! 66: for (;;)
! 67: {
! 68: printf " re> " if $infile eq "STDIN";
! 69: last if ! ($_ = <$infile>);
! 70: printf $outfile "$_" if $infile ne "STDIN";
! 71: next if ($_ eq "");
! 72:
! 73: $pattern = $_;
! 74:
! 75: while ($pattern !~ /^\s*(.).*\1/s)
! 76: {
! 77: printf " > " if $infile eq "STDIN";
! 78: last if ! ($_ = <$infile>);
! 79: printf $outfile "$_" if $infile ne "STDIN";
! 80: $pattern .= $_;
! 81: }
! 82:
! 83: chomp($pattern);
! 84: $pattern =~ s/\s+$//;
! 85:
! 86: # The private /+ modifier means "print $' afterwards".
! 87:
! 88: $showrest = ($pattern =~ s/\+(?=[a-zA-Z]*$)//);
! 89:
! 90: # A doubled version is used by pcretest to print remainders after captures
! 91:
! 92: $pattern =~ s/\+(?=[a-zA-Z]*$)//;
! 93:
! 94: # Remove /8 from a UTF-8 pattern.
! 95:
! 96: $utf8 = $pattern =~ s/8(?=[a-zA-Z]*$)//;
! 97:
! 98: # Remove /J from a pattern with duplicate names.
! 99:
! 100: $pattern =~ s/J(?=[a-zA-Z]*$)//;
! 101:
! 102: # Remove /K from a pattern (asks pcretest to check MARK data) */
! 103:
! 104: $pattern =~ s/K(?=[a-zA-Z]*$)//;
! 105:
! 106: # Remove /W from a pattern (asks pcretest to set PCRE_UCP)
! 107:
! 108: $pattern =~ s/W(?=[a-zA-Z]*$)//;
! 109:
! 110: # Remove /S or /SS from a pattern (asks pcretest to study or not to study)
! 111:
! 112: $pattern =~ s/S(?=[a-zA-Z]*$)//g;
! 113:
! 114: # Remove /Y from a pattern (asks pcretest to disable PCRE optimization)
! 115:
! 116: $pattern =~ s/Y(?=[a-zA-Z]*$)//;
! 117:
! 118: # Check that the pattern is valid
! 119:
! 120: eval "\$_ =~ ${pattern}";
! 121: if ($@)
! 122: {
! 123: printf $outfile "Error: $@";
! 124: next NEXT_RE;
! 125: }
! 126:
! 127: # If the /g modifier is present, we want to put a loop round the matching;
! 128: # otherwise just a single "if".
! 129:
! 130: $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
! 131:
! 132: # If the pattern is actually the null string, Perl uses the most recently
! 133: # executed (and successfully compiled) regex is used instead. This is a
! 134: # nasty trap for the unwary! The PCRE test suite does contain null strings
! 135: # in places - if they are allowed through here all sorts of weird and
! 136: # unexpected effects happen. To avoid this, we replace such patterns with
! 137: # a non-null pattern that has the same effect.
! 138:
! 139: $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
! 140:
! 141: # Read data lines and test them
! 142:
! 143: for (;;)
! 144: {
! 145: printf "data> " if $infile eq "STDIN";
! 146: last NEXT_RE if ! ($_ = <$infile>);
! 147: chomp;
! 148: printf $outfile "$_\n" if $infile ne "STDIN";
! 149:
! 150: s/\s+$//; # Remove trailing space
! 151: s/^\s+//; # Remove leading space
! 152: s/\\Y//g; # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
! 153:
! 154: last if ($_ eq "");
! 155: $x = eval "\"$_\""; # To get escapes processed
! 156:
! 157: # Empty array for holding results, ensure $REGERROR and $REGMARK are
! 158: # unset, then do the matching.
! 159:
! 160: @subs = ();
! 161:
! 162: $pushes = "push \@subs,\$&;" .
! 163: "push \@subs,\$1;" .
! 164: "push \@subs,\$2;" .
! 165: "push \@subs,\$3;" .
! 166: "push \@subs,\$4;" .
! 167: "push \@subs,\$5;" .
! 168: "push \@subs,\$6;" .
! 169: "push \@subs,\$7;" .
! 170: "push \@subs,\$8;" .
! 171: "push \@subs,\$9;" .
! 172: "push \@subs,\$10;" .
! 173: "push \@subs,\$11;" .
! 174: "push \@subs,\$12;" .
! 175: "push \@subs,\$13;" .
! 176: "push \@subs,\$14;" .
! 177: "push \@subs,\$15;" .
! 178: "push \@subs,\$16;" .
! 179: "push \@subs,\$'; }";
! 180:
! 181: undef $REGERROR;
! 182: undef $REGMARK;
! 183:
! 184: eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
! 185:
! 186: if ($@)
! 187: {
! 188: printf $outfile "Error: $@\n";
! 189: next NEXT_RE;
! 190: }
! 191: elsif (scalar(@subs) == 0)
! 192: {
! 193: printf $outfile "No match";
! 194: if (defined $REGERROR && $REGERROR != 1)
! 195: { print $outfile (", mark = $REGERROR"); }
! 196: printf $outfile "\n";
! 197: }
! 198: else
! 199: {
! 200: while (scalar(@subs) != 0)
! 201: {
! 202: printf $outfile (" 0: %s\n", &pchars($subs[0]));
! 203: printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
! 204: $last_printed = 0;
! 205: for ($i = 1; $i <= 16; $i++)
! 206: {
! 207: if (defined $subs[$i])
! 208: {
! 209: while ($last_printed++ < $i-1)
! 210: { printf $outfile ("%2d: <unset>\n", $last_printed); }
! 211: printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
! 212: $last_printed = $i;
! 213: }
! 214: }
! 215: splice(@subs, 0, 18);
! 216: }
! 217: if (defined $REGMARK && $REGMARK != 1)
! 218: { print $outfile ("MK: $REGMARK\n"); }
! 219: }
! 220: }
! 221: }
! 222:
! 223: # printf $outfile "\n";
! 224:
! 225: # End
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>