Annotation of embedaddon/pcre/perltest.pl, revision 1.1.1.2
1.1 misho 1: #! /usr/bin/env perl
2:
3: # Program for testing regular expressions with perl to check that PCRE handles
1.1.1.2 ! misho 4: # them the same. This version supports /8 for UTF-8 testing. However, it needs
! 5: # to have "use utf8" at the start for running the UTF-8 tests, but *not* for
! 6: # the other tests. The only way I've found for doing this is to cat this line
! 7: # in explicitly in the RunPerlTest script. I've also used this method to supply
! 8: # "require Encode" for the UTF-8 tests, so that the main test will still run
! 9: # where Encode is not installed.
1.1 misho 10:
11: # use locale; # With this included, \x0b matches \s!
12:
1.1.1.2 ! misho 13: # Function for turning a string into a string of printing chars.
! 14:
! 15: #require Encode;
1.1 misho 16:
17: sub pchars {
18: my($t) = "";
19:
20: if ($utf8)
21: {
22: @p = unpack('U*', $_[0]);
23: foreach $c (@p)
24: {
25: if ($c >= 32 && $c < 127) { $t .= chr $c; }
1.1.1.2 ! misho 26: else { $t .= sprintf("\\x{%02x}", $c);
! 27: }
1.1 misho 28: }
29: }
30: else
31: {
32: foreach $c (split(//, $_[0]))
33: {
34: if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
35: else { $t .= sprintf("\\x%02x", ord $c); }
36: }
37: }
38:
39: $t;
40: }
41:
42:
43: # Read lines from named file or stdin and write to named file or stdout; lines
44: # consist of a regular expression, in delimiters and optionally followed by
45: # options, followed by a set of test data, terminated by an empty line.
46:
47: # Sort out the input and output files
48:
49: if (@ARGV > 0)
50: {
51: open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
52: $infile = "INFILE";
53: }
54: else { $infile = "STDIN"; }
55:
56: if (@ARGV > 1)
57: {
58: open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
59: $outfile = "OUTFILE";
60: }
61: else { $outfile = "STDOUT"; }
62:
63: printf($outfile "Perl $] Regular Expressions\n\n");
64:
65: # Main loop
66:
67: NEXT_RE:
68: for (;;)
69: {
70: printf " re> " if $infile eq "STDIN";
71: last if ! ($_ = <$infile>);
72: printf $outfile "$_" if $infile ne "STDIN";
73: next if ($_ eq "");
74:
75: $pattern = $_;
76:
77: while ($pattern !~ /^\s*(.).*\1/s)
78: {
79: printf " > " if $infile eq "STDIN";
80: last if ! ($_ = <$infile>);
81: printf $outfile "$_" if $infile ne "STDIN";
82: $pattern .= $_;
83: }
84:
85: chomp($pattern);
86: $pattern =~ s/\s+$//;
87:
88: # The private /+ modifier means "print $' afterwards".
89:
90: $showrest = ($pattern =~ s/\+(?=[a-zA-Z]*$)//);
91:
92: # A doubled version is used by pcretest to print remainders after captures
93:
94: $pattern =~ s/\+(?=[a-zA-Z]*$)//;
95:
96: # Remove /8 from a UTF-8 pattern.
97:
98: $utf8 = $pattern =~ s/8(?=[a-zA-Z]*$)//;
99:
100: # Remove /J from a pattern with duplicate names.
101:
102: $pattern =~ s/J(?=[a-zA-Z]*$)//;
103:
104: # Remove /K from a pattern (asks pcretest to check MARK data) */
105:
106: $pattern =~ s/K(?=[a-zA-Z]*$)//;
107:
108: # Remove /W from a pattern (asks pcretest to set PCRE_UCP)
109:
110: $pattern =~ s/W(?=[a-zA-Z]*$)//;
111:
112: # Remove /S or /SS from a pattern (asks pcretest to study or not to study)
113:
114: $pattern =~ s/S(?=[a-zA-Z]*$)//g;
115:
116: # Remove /Y from a pattern (asks pcretest to disable PCRE optimization)
117:
118: $pattern =~ s/Y(?=[a-zA-Z]*$)//;
119:
120: # Check that the pattern is valid
121:
122: eval "\$_ =~ ${pattern}";
123: if ($@)
124: {
125: printf $outfile "Error: $@";
126: next NEXT_RE;
127: }
128:
129: # If the /g modifier is present, we want to put a loop round the matching;
130: # otherwise just a single "if".
131:
132: $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
133:
134: # If the pattern is actually the null string, Perl uses the most recently
135: # executed (and successfully compiled) regex is used instead. This is a
136: # nasty trap for the unwary! The PCRE test suite does contain null strings
137: # in places - if they are allowed through here all sorts of weird and
138: # unexpected effects happen. To avoid this, we replace such patterns with
139: # a non-null pattern that has the same effect.
140:
141: $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
142:
143: # Read data lines and test them
144:
145: for (;;)
146: {
147: printf "data> " if $infile eq "STDIN";
148: last NEXT_RE if ! ($_ = <$infile>);
149: chomp;
150: printf $outfile "$_\n" if $infile ne "STDIN";
151:
152: s/\s+$//; # Remove trailing space
153: s/^\s+//; # Remove leading space
154: s/\\Y//g; # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
155:
156: last if ($_ eq "");
157: $x = eval "\"$_\""; # To get escapes processed
158:
159: # Empty array for holding results, ensure $REGERROR and $REGMARK are
160: # unset, then do the matching.
161:
162: @subs = ();
163:
164: $pushes = "push \@subs,\$&;" .
165: "push \@subs,\$1;" .
166: "push \@subs,\$2;" .
167: "push \@subs,\$3;" .
168: "push \@subs,\$4;" .
169: "push \@subs,\$5;" .
170: "push \@subs,\$6;" .
171: "push \@subs,\$7;" .
172: "push \@subs,\$8;" .
173: "push \@subs,\$9;" .
174: "push \@subs,\$10;" .
175: "push \@subs,\$11;" .
176: "push \@subs,\$12;" .
177: "push \@subs,\$13;" .
178: "push \@subs,\$14;" .
179: "push \@subs,\$15;" .
180: "push \@subs,\$16;" .
181: "push \@subs,\$'; }";
182:
183: undef $REGERROR;
184: undef $REGMARK;
185:
186: eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
187:
188: if ($@)
189: {
190: printf $outfile "Error: $@\n";
191: next NEXT_RE;
192: }
193: elsif (scalar(@subs) == 0)
194: {
195: printf $outfile "No match";
196: if (defined $REGERROR && $REGERROR != 1)
1.1.1.2 ! misho 197: { printf $outfile (", mark = %s", &pchars($REGERROR)); }
1.1 misho 198: printf $outfile "\n";
199: }
200: else
201: {
202: while (scalar(@subs) != 0)
203: {
204: printf $outfile (" 0: %s\n", &pchars($subs[0]));
205: printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
206: $last_printed = 0;
207: for ($i = 1; $i <= 16; $i++)
208: {
209: if (defined $subs[$i])
210: {
211: while ($last_printed++ < $i-1)
212: { printf $outfile ("%2d: <unset>\n", $last_printed); }
213: printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
214: $last_printed = $i;
215: }
216: }
217: splice(@subs, 0, 18);
218: }
1.1.1.2 ! misho 219:
! 220: # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
! 221: # set and the input pattern was a UTF-8 string. We can, however, force
! 222: # it to be so marked.
! 223:
1.1 misho 224: if (defined $REGMARK && $REGMARK != 1)
1.1.1.2 ! misho 225: {
! 226: $xx = $REGMARK;
! 227: $xx = Encode::decode_utf8($xx) if $utf8;
! 228: printf $outfile ("MK: %s\n", &pchars($xx));
! 229: }
1.1 misho 230: }
231: }
232: }
233:
234: # printf $outfile "\n";
235:
236: # End
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>