version 1.1, 2012/02/21 23:05:51
|
version 1.1.1.4, 2014/06/15 19:46:04
|
Line 1
|
Line 1
|
#! /usr/bin/env perl |
#! /usr/bin/env perl |
|
|
# Program for testing regular expressions with perl to check that PCRE handles |
# Program for testing regular expressions with perl to check that PCRE handles |
# them the same. This is the version that supports /8 for UTF-8 testing. As it | # them the same. This version needs to have "use utf8" at the start for running |
# stands, it requires at least Perl 5.8 for UTF-8 support. However, it needs to | # the UTF-8 tests, but *not* for the other tests. The only way I've found for |
# have "use utf8" at the start for running the UTF-8 tests, but *not* for the | # doing this is to cat this line in explicitly in the RunPerlTest script. I've |
# other tests. The only way I've found for doing this is to cat this line in | # also used this method to supply "require Encode" for the UTF-8 tests, so that |
# explicitly in the RunPerlTest script. | # the main test will still run where Encode is not installed. |
|
|
# use locale; # With this included, \x0b matches \s! | #use utf8; |
| #require Encode; |
|
|
# Function for turning a string into a string of printing chars. There are | # Function for turning a string into a string of printing chars. |
# currently problems with UTF-8 strings; this fudges round them. | |
|
|
sub pchars { |
sub pchars { |
my($t) = ""; |
my($t) = ""; |
Line 21 if ($utf8)
|
Line 21 if ($utf8)
|
foreach $c (@p) |
foreach $c (@p) |
{ |
{ |
if ($c >= 32 && $c < 127) { $t .= chr $c; } |
if ($c >= 32 && $c < 127) { $t .= chr $c; } |
else { $t .= sprintf("\\x{%02x}", $c); } | else { $t .= sprintf("\\x{%02x}", $c); |
| } |
} |
} |
} |
} |
|
|
else |
else |
{ |
{ |
foreach $c (split(//, $_[0])) |
foreach $c (split(//, $_[0])) |
Line 68 for (;;)
|
Line 68 for (;;)
|
printf " re> " if $infile eq "STDIN"; |
printf " re> " if $infile eq "STDIN"; |
last if ! ($_ = <$infile>); |
last if ! ($_ = <$infile>); |
printf $outfile "$_" if $infile ne "STDIN"; |
printf $outfile "$_" if $infile ne "STDIN"; |
next if ($_ eq ""); | next if ($_ =~ /^\s*$/ || $_ =~ /^< forbid/); |
|
|
$pattern = $_; |
$pattern = $_; |
|
|
Line 103 for (;;)
|
Line 103 for (;;)
|
|
|
$pattern =~ s/K(?=[a-zA-Z]*$)//; |
$pattern =~ s/K(?=[a-zA-Z]*$)//; |
|
|
# Remove /W from a pattern (asks pcretest to set PCRE_UCP) | # /W asks pcretest to set PCRE_UCP; change this to /u for Perl |
|
|
$pattern =~ s/W(?=[a-zA-Z]*$)//; | $pattern =~ s/W(?=[a-zA-Z]*$)/u/; |
|
|
# Remove /S or /SS from a pattern (asks pcretest to study or not to study) |
# Remove /S or /SS from a pattern (asks pcretest to study or not to study) |
|
|
$pattern =~ s/S(?=[a-zA-Z]*$)//g; |
$pattern =~ s/S(?=[a-zA-Z]*$)//g; |
|
|
# Remove /Y from a pattern (asks pcretest to disable PCRE optimization) | # Remove /Y and /O from a pattern (disable PCRE optimizations) |
|
|
$pattern =~ s/Y(?=[a-zA-Z]*$)//; | $pattern =~ s/[YO](?=[a-zA-Z]*$)//; |
|
|
# Check that the pattern is valid |
# Check that the pattern is valid |
|
|
Line 192 for (;;)
|
Line 192 for (;;)
|
{ |
{ |
printf $outfile "No match"; |
printf $outfile "No match"; |
if (defined $REGERROR && $REGERROR != 1) |
if (defined $REGERROR && $REGERROR != 1) |
{ print $outfile (", mark = $REGERROR"); } | { printf $outfile (", mark = %s", &pchars($REGERROR)); } |
printf $outfile "\n"; |
printf $outfile "\n"; |
} |
} |
else |
else |
Line 214 for (;;)
|
Line 214 for (;;)
|
} |
} |
splice(@subs, 0, 18); |
splice(@subs, 0, 18); |
} |
} |
|
|
|
# It seems that $REGMARK is not marked as UTF-8 even when use utf8 is |
|
# set and the input pattern was a UTF-8 string. We can, however, force |
|
# it to be so marked. |
|
|
if (defined $REGMARK && $REGMARK != 1) |
if (defined $REGMARK && $REGMARK != 1) |
{ print $outfile ("MK: $REGMARK\n"); } | { |
| $xx = $REGMARK; |
| $xx = Encode::decode_utf8($xx) if $utf8; |
| printf $outfile ("MK: %s\n", &pchars($xx)); |
| } |
} |
} |
} |
} |
} |
} |