Annotation of embedaddon/pcre/perltest.pl, revision 1.1.1.4
1.1 misho 1: #! /usr/bin/env perl
2:
3: # Program for testing regular expressions with perl to check that PCRE handles
1.1.1.4 ! misho 4: # them the same. This version needs to have "use utf8" at the start for running
! 5: # the UTF-8 tests, but *not* for the other tests. The only way I've found for
! 6: # doing this is to cat this line in explicitly in the RunPerlTest script. I've
! 7: # also used this method to supply "require Encode" for the UTF-8 tests, so that
! 8: # the main test will still run where Encode is not installed.
1.1.1.2 misho 9:
1.1.1.3 misho 10: #use utf8;
1.1.1.2 misho 11: #require Encode;
1.1 misho 12:
1.1.1.4 ! misho 13: # Function for turning a string into a string of printing chars.
! 14:
1.1 misho 15: sub pchars {
16: my($t) = "";
17:
18: if ($utf8)
19: {
20: @p = unpack('U*', $_[0]);
21: foreach $c (@p)
22: {
23: if ($c >= 32 && $c < 127) { $t .= chr $c; }
1.1.1.2 misho 24: else { $t .= sprintf("\\x{%02x}", $c);
25: }
1.1 misho 26: }
27: }
28: else
29: {
30: foreach $c (split(//, $_[0]))
31: {
32: if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
33: else { $t .= sprintf("\\x%02x", ord $c); }
34: }
35: }
36:
37: $t;
38: }
39:
40:
41: # Read lines from named file or stdin and write to named file or stdout; lines
42: # consist of a regular expression, in delimiters and optionally followed by
43: # options, followed by a set of test data, terminated by an empty line.
44:
45: # Sort out the input and output files
46:
47: if (@ARGV > 0)
48: {
49: open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
50: $infile = "INFILE";
51: }
52: else { $infile = "STDIN"; }
53:
54: if (@ARGV > 1)
55: {
56: open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
57: $outfile = "OUTFILE";
58: }
59: else { $outfile = "STDOUT"; }
60:
61: printf($outfile "Perl $] Regular Expressions\n\n");
62:
63: # Main loop
64:
65: NEXT_RE:
66: for (;;)
67: {
68: printf " re> " if $infile eq "STDIN";
69: last if ! ($_ = <$infile>);
70: printf $outfile "$_" if $infile ne "STDIN";
1.1.1.4 ! misho 71: next if ($_ =~ /^\s*$/ || $_ =~ /^< forbid/);
1.1 misho 72:
73: $pattern = $_;
74:
75: while ($pattern !~ /^\s*(.).*\1/s)
76: {
77: printf " > " if $infile eq "STDIN";
78: last if ! ($_ = <$infile>);
79: printf $outfile "$_" if $infile ne "STDIN";
80: $pattern .= $_;
81: }
82:
83: chomp($pattern);
84: $pattern =~ s/\s+$//;
85:
86: # The private /+ modifier means "print $' afterwards".
87:
88: $showrest = ($pattern =~ s/\+(?=[a-zA-Z]*$)//);
89:
90: # A doubled version is used by pcretest to print remainders after captures
91:
92: $pattern =~ s/\+(?=[a-zA-Z]*$)//;
93:
94: # Remove /8 from a UTF-8 pattern.
95:
96: $utf8 = $pattern =~ s/8(?=[a-zA-Z]*$)//;
97:
98: # Remove /J from a pattern with duplicate names.
99:
100: $pattern =~ s/J(?=[a-zA-Z]*$)//;
101:
102: # Remove /K from a pattern (asks pcretest to check MARK data) */
103:
104: $pattern =~ s/K(?=[a-zA-Z]*$)//;
105:
1.1.1.4 ! misho 106: # /W asks pcretest to set PCRE_UCP; change this to /u for Perl
1.1 misho 107:
1.1.1.4 ! misho 108: $pattern =~ s/W(?=[a-zA-Z]*$)/u/;
1.1 misho 109:
110: # Remove /S or /SS from a pattern (asks pcretest to study or not to study)
111:
112: $pattern =~ s/S(?=[a-zA-Z]*$)//g;
113:
1.1.1.4 ! misho 114: # Remove /Y and /O from a pattern (disable PCRE optimizations)
1.1 misho 115:
1.1.1.4 ! misho 116: $pattern =~ s/[YO](?=[a-zA-Z]*$)//;
1.1 misho 117:
118: # Check that the pattern is valid
119:
120: eval "\$_ =~ ${pattern}";
121: if ($@)
122: {
123: printf $outfile "Error: $@";
124: next NEXT_RE;
125: }
126:
127: # If the /g modifier is present, we want to put a loop round the matching;
128: # otherwise just a single "if".
129:
130: $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
131:
132: # If the pattern is actually the null string, Perl uses the most recently
133: # executed (and successfully compiled) regex is used instead. This is a
134: # nasty trap for the unwary! The PCRE test suite does contain null strings
135: # in places - if they are allowed through here all sorts of weird and
136: # unexpected effects happen. To avoid this, we replace such patterns with
137: # a non-null pattern that has the same effect.
138:
139: $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
140:
141: # Read data lines and test them
142:
143: for (;;)
144: {
145: printf "data> " if $infile eq "STDIN";
146: last NEXT_RE if ! ($_ = <$infile>);
147: chomp;
148: printf $outfile "$_\n" if $infile ne "STDIN";
149:
150: s/\s+$//; # Remove trailing space
151: s/^\s+//; # Remove leading space
152: s/\\Y//g; # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
153:
154: last if ($_ eq "");
155: $x = eval "\"$_\""; # To get escapes processed
156:
157: # Empty array for holding results, ensure $REGERROR and $REGMARK are
158: # unset, then do the matching.
159:
160: @subs = ();
161:
162: $pushes = "push \@subs,\$&;" .
163: "push \@subs,\$1;" .
164: "push \@subs,\$2;" .
165: "push \@subs,\$3;" .
166: "push \@subs,\$4;" .
167: "push \@subs,\$5;" .
168: "push \@subs,\$6;" .
169: "push \@subs,\$7;" .
170: "push \@subs,\$8;" .
171: "push \@subs,\$9;" .
172: "push \@subs,\$10;" .
173: "push \@subs,\$11;" .
174: "push \@subs,\$12;" .
175: "push \@subs,\$13;" .
176: "push \@subs,\$14;" .
177: "push \@subs,\$15;" .
178: "push \@subs,\$16;" .
179: "push \@subs,\$'; }";
180:
181: undef $REGERROR;
182: undef $REGMARK;
183:
184: eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
185:
186: if ($@)
187: {
188: printf $outfile "Error: $@\n";
189: next NEXT_RE;
190: }
191: elsif (scalar(@subs) == 0)
192: {
193: printf $outfile "No match";
194: if (defined $REGERROR && $REGERROR != 1)
1.1.1.2 misho 195: { printf $outfile (", mark = %s", &pchars($REGERROR)); }
1.1 misho 196: printf $outfile "\n";
197: }
198: else
199: {
200: while (scalar(@subs) != 0)
201: {
202: printf $outfile (" 0: %s\n", &pchars($subs[0]));
203: printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
204: $last_printed = 0;
205: for ($i = 1; $i <= 16; $i++)
206: {
207: if (defined $subs[$i])
208: {
209: while ($last_printed++ < $i-1)
210: { printf $outfile ("%2d: <unset>\n", $last_printed); }
211: printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
212: $last_printed = $i;
213: }
214: }
215: splice(@subs, 0, 18);
216: }
1.1.1.2 misho 217:
218: # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
219: # set and the input pattern was a UTF-8 string. We can, however, force
220: # it to be so marked.
221:
1.1 misho 222: if (defined $REGMARK && $REGMARK != 1)
1.1.1.2 misho 223: {
224: $xx = $REGMARK;
225: $xx = Encode::decode_utf8($xx) if $utf8;
226: printf $outfile ("MK: %s\n", &pchars($xx));
227: }
1.1 misho 228: }
229: }
230: }
231:
232: # printf $outfile "\n";
233:
234: # End
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>