Annotation of embedaddon/pcre/perltest.pl, revision 1.1.1.3
1.1 misho 1: #! /usr/bin/env perl
2:
3: # Program for testing regular expressions with perl to check that PCRE handles
1.1.1.2 misho 4: # them the same. This version supports /8 for UTF-8 testing. However, it needs
5: # to have "use utf8" at the start for running the UTF-8 tests, but *not* for
6: # the other tests. The only way I've found for doing this is to cat this line
7: # in explicitly in the RunPerlTest script. I've also used this method to supply
8: # "require Encode" for the UTF-8 tests, so that the main test will still run
9: # where Encode is not installed.
1.1 misho 10:
11: # use locale; # With this included, \x0b matches \s!
12:
1.1.1.2 misho 13: # Function for turning a string into a string of printing chars.
14:
1.1.1.3 ! misho 15: #use utf8;
1.1.1.2 misho 16: #require Encode;
1.1 misho 17:
18: sub pchars {
19: my($t) = "";
20:
21: if ($utf8)
22: {
23: @p = unpack('U*', $_[0]);
24: foreach $c (@p)
25: {
26: if ($c >= 32 && $c < 127) { $t .= chr $c; }
1.1.1.2 misho 27: else { $t .= sprintf("\\x{%02x}", $c);
28: }
1.1 misho 29: }
30: }
31: else
32: {
33: foreach $c (split(//, $_[0]))
34: {
35: if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
36: else { $t .= sprintf("\\x%02x", ord $c); }
37: }
38: }
39:
40: $t;
41: }
42:
43:
44: # Read lines from named file or stdin and write to named file or stdout; lines
45: # consist of a regular expression, in delimiters and optionally followed by
46: # options, followed by a set of test data, terminated by an empty line.
47:
48: # Sort out the input and output files
49:
50: if (@ARGV > 0)
51: {
52: open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
53: $infile = "INFILE";
54: }
55: else { $infile = "STDIN"; }
56:
57: if (@ARGV > 1)
58: {
59: open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
60: $outfile = "OUTFILE";
61: }
62: else { $outfile = "STDOUT"; }
63:
64: printf($outfile "Perl $] Regular Expressions\n\n");
65:
66: # Main loop
67:
68: NEXT_RE:
69: for (;;)
70: {
71: printf " re> " if $infile eq "STDIN";
72: last if ! ($_ = <$infile>);
73: printf $outfile "$_" if $infile ne "STDIN";
74: next if ($_ eq "");
75:
76: $pattern = $_;
77:
78: while ($pattern !~ /^\s*(.).*\1/s)
79: {
80: printf " > " if $infile eq "STDIN";
81: last if ! ($_ = <$infile>);
82: printf $outfile "$_" if $infile ne "STDIN";
83: $pattern .= $_;
84: }
85:
86: chomp($pattern);
87: $pattern =~ s/\s+$//;
88:
89: # The private /+ modifier means "print $' afterwards".
90:
91: $showrest = ($pattern =~ s/\+(?=[a-zA-Z]*$)//);
92:
93: # A doubled version is used by pcretest to print remainders after captures
94:
95: $pattern =~ s/\+(?=[a-zA-Z]*$)//;
96:
97: # Remove /8 from a UTF-8 pattern.
98:
99: $utf8 = $pattern =~ s/8(?=[a-zA-Z]*$)//;
100:
101: # Remove /J from a pattern with duplicate names.
102:
103: $pattern =~ s/J(?=[a-zA-Z]*$)//;
104:
105: # Remove /K from a pattern (asks pcretest to check MARK data) */
106:
107: $pattern =~ s/K(?=[a-zA-Z]*$)//;
108:
109: # Remove /W from a pattern (asks pcretest to set PCRE_UCP)
110:
111: $pattern =~ s/W(?=[a-zA-Z]*$)//;
112:
113: # Remove /S or /SS from a pattern (asks pcretest to study or not to study)
114:
115: $pattern =~ s/S(?=[a-zA-Z]*$)//g;
116:
117: # Remove /Y from a pattern (asks pcretest to disable PCRE optimization)
118:
119: $pattern =~ s/Y(?=[a-zA-Z]*$)//;
120:
121: # Check that the pattern is valid
122:
123: eval "\$_ =~ ${pattern}";
124: if ($@)
125: {
126: printf $outfile "Error: $@";
127: next NEXT_RE;
128: }
129:
130: # If the /g modifier is present, we want to put a loop round the matching;
131: # otherwise just a single "if".
132:
133: $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
134:
135: # If the pattern is actually the null string, Perl uses the most recently
136: # executed (and successfully compiled) regex is used instead. This is a
137: # nasty trap for the unwary! The PCRE test suite does contain null strings
138: # in places - if they are allowed through here all sorts of weird and
139: # unexpected effects happen. To avoid this, we replace such patterns with
140: # a non-null pattern that has the same effect.
141:
142: $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
143:
144: # Read data lines and test them
145:
146: for (;;)
147: {
148: printf "data> " if $infile eq "STDIN";
149: last NEXT_RE if ! ($_ = <$infile>);
150: chomp;
151: printf $outfile "$_\n" if $infile ne "STDIN";
152:
153: s/\s+$//; # Remove trailing space
154: s/^\s+//; # Remove leading space
155: s/\\Y//g; # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
156:
157: last if ($_ eq "");
158: $x = eval "\"$_\""; # To get escapes processed
159:
160: # Empty array for holding results, ensure $REGERROR and $REGMARK are
161: # unset, then do the matching.
162:
163: @subs = ();
164:
165: $pushes = "push \@subs,\$&;" .
166: "push \@subs,\$1;" .
167: "push \@subs,\$2;" .
168: "push \@subs,\$3;" .
169: "push \@subs,\$4;" .
170: "push \@subs,\$5;" .
171: "push \@subs,\$6;" .
172: "push \@subs,\$7;" .
173: "push \@subs,\$8;" .
174: "push \@subs,\$9;" .
175: "push \@subs,\$10;" .
176: "push \@subs,\$11;" .
177: "push \@subs,\$12;" .
178: "push \@subs,\$13;" .
179: "push \@subs,\$14;" .
180: "push \@subs,\$15;" .
181: "push \@subs,\$16;" .
182: "push \@subs,\$'; }";
183:
184: undef $REGERROR;
185: undef $REGMARK;
186:
187: eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
188:
189: if ($@)
190: {
191: printf $outfile "Error: $@\n";
192: next NEXT_RE;
193: }
194: elsif (scalar(@subs) == 0)
195: {
196: printf $outfile "No match";
197: if (defined $REGERROR && $REGERROR != 1)
1.1.1.2 misho 198: { printf $outfile (", mark = %s", &pchars($REGERROR)); }
1.1 misho 199: printf $outfile "\n";
200: }
201: else
202: {
203: while (scalar(@subs) != 0)
204: {
205: printf $outfile (" 0: %s\n", &pchars($subs[0]));
206: printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
207: $last_printed = 0;
208: for ($i = 1; $i <= 16; $i++)
209: {
210: if (defined $subs[$i])
211: {
212: while ($last_printed++ < $i-1)
213: { printf $outfile ("%2d: <unset>\n", $last_printed); }
214: printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
215: $last_printed = $i;
216: }
217: }
218: splice(@subs, 0, 18);
219: }
1.1.1.2 misho 220:
221: # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
222: # set and the input pattern was a UTF-8 string. We can, however, force
223: # it to be so marked.
224:
1.1 misho 225: if (defined $REGMARK && $REGMARK != 1)
1.1.1.2 misho 226: {
227: $xx = $REGMARK;
228: $xx = Encode::decode_utf8($xx) if $utf8;
229: printf $outfile ("MK: %s\n", &pchars($xx));
230: }
1.1 misho 231: }
232: }
233: }
234:
235: # printf $outfile "\n";
236:
237: # End
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>