Annotation of embedaddon/pcre/perltest.pl, revision 1.1

1.1     ! misho       1: #! /usr/bin/env perl
        !             2: 
        !             3: # Program for testing regular expressions with perl to check that PCRE handles
        !             4: # them the same. This is the version that supports /8 for UTF-8 testing. As it
        !             5: # stands, it requires at least Perl 5.8 for UTF-8 support. However, it needs to
        !             6: # have "use utf8" at the start for running the UTF-8 tests, but *not* for the
        !             7: # other tests. The only way I've found for doing this is to cat this line in
        !             8: # explicitly in the RunPerlTest script.
        !             9: 
        !            10: # use locale;  # With this included, \x0b matches \s!
        !            11: 
        !            12: # Function for turning a string into a string of printing chars. There are
        !            13: # currently problems with UTF-8 strings; this fudges round them.
        !            14: 
        !            15: sub pchars {
        !            16: my($t) = "";
        !            17: 
        !            18: if ($utf8)
        !            19:   {
        !            20:   @p = unpack('U*', $_[0]);
        !            21:   foreach $c (@p)
        !            22:     {
        !            23:     if ($c >= 32 && $c < 127) { $t .= chr $c; }
        !            24:       else { $t .= sprintf("\\x{%02x}", $c); }
        !            25:     }
        !            26:   }
        !            27: 
        !            28: else
        !            29:   {
        !            30:   foreach $c (split(//, $_[0]))
        !            31:     {
        !            32:     if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
        !            33:       else { $t .= sprintf("\\x%02x", ord $c); }
        !            34:     }
        !            35:   }
        !            36: 
        !            37: $t;
        !            38: }
        !            39: 
        !            40: 
        !            41: # Read lines from named file or stdin and write to named file or stdout; lines
        !            42: # consist of a regular expression, in delimiters and optionally followed by
        !            43: # options, followed by a set of test data, terminated by an empty line.
        !            44: 
        !            45: # Sort out the input and output files
        !            46: 
        !            47: if (@ARGV > 0)
        !            48:   {
        !            49:   open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
        !            50:   $infile = "INFILE";
        !            51:   }
        !            52: else { $infile = "STDIN"; }
        !            53: 
        !            54: if (@ARGV > 1)
        !            55:   {
        !            56:   open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
        !            57:   $outfile = "OUTFILE";
        !            58:   }
        !            59: else { $outfile = "STDOUT"; }
        !            60: 
        !            61: printf($outfile "Perl $] Regular Expressions\n\n");
        !            62: 
        !            63: # Main loop
        !            64: 
        !            65: NEXT_RE:
        !            66: for (;;)
        !            67:   {
        !            68:   printf "  re> " if $infile eq "STDIN";
        !            69:   last if ! ($_ = <$infile>);
        !            70:   printf $outfile "$_" if $infile ne "STDIN";
        !            71:   next if ($_ eq "");
        !            72: 
        !            73:   $pattern = $_;
        !            74: 
        !            75:   while ($pattern !~ /^\s*(.).*\1/s)
        !            76:     {
        !            77:     printf "    > " if $infile eq "STDIN";
        !            78:     last if ! ($_ = <$infile>);
        !            79:     printf $outfile "$_" if $infile ne "STDIN";
        !            80:     $pattern .= $_;
        !            81:     }
        !            82: 
        !            83:    chomp($pattern);
        !            84:    $pattern =~ s/\s+$//;
        !            85: 
        !            86:   # The private /+ modifier means "print $' afterwards".
        !            87: 
        !            88:   $showrest = ($pattern =~ s/\+(?=[a-zA-Z]*$)//);
        !            89: 
        !            90:   # A doubled version is used by pcretest to print remainders after captures
        !            91: 
        !            92:   $pattern =~ s/\+(?=[a-zA-Z]*$)//;
        !            93: 
        !            94:   # Remove /8 from a UTF-8 pattern.
        !            95: 
        !            96:   $utf8 = $pattern =~ s/8(?=[a-zA-Z]*$)//;
        !            97: 
        !            98:   # Remove /J from a pattern with duplicate names.
        !            99: 
        !           100:   $pattern =~ s/J(?=[a-zA-Z]*$)//;
        !           101: 
        !           102:   # Remove /K from a pattern (asks pcretest to check MARK data) */
        !           103: 
        !           104:   $pattern =~ s/K(?=[a-zA-Z]*$)//;
        !           105: 
        !           106:   # Remove /W from a pattern (asks pcretest to set PCRE_UCP)
        !           107: 
        !           108:   $pattern =~ s/W(?=[a-zA-Z]*$)//;
        !           109: 
        !           110:   # Remove /S or /SS from a pattern (asks pcretest to study or not to study)
        !           111: 
        !           112:   $pattern =~ s/S(?=[a-zA-Z]*$)//g;
        !           113: 
        !           114:   # Remove /Y from a pattern (asks pcretest to disable PCRE optimization)
        !           115: 
        !           116:   $pattern =~ s/Y(?=[a-zA-Z]*$)//;
        !           117: 
        !           118:   # Check that the pattern is valid
        !           119: 
        !           120:   eval "\$_ =~ ${pattern}";
        !           121:   if ($@)
        !           122:     {
        !           123:     printf $outfile "Error: $@";
        !           124:     next NEXT_RE;
        !           125:     }
        !           126: 
        !           127:   # If the /g modifier is present, we want to put a loop round the matching;
        !           128:   # otherwise just a single "if".
        !           129: 
        !           130:   $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
        !           131: 
        !           132:   # If the pattern is actually the null string, Perl uses the most recently
        !           133:   # executed (and successfully compiled) regex is used instead. This is a
        !           134:   # nasty trap for the unwary! The PCRE test suite does contain null strings
        !           135:   # in places - if they are allowed through here all sorts of weird and
        !           136:   # unexpected effects happen. To avoid this, we replace such patterns with
        !           137:   # a non-null pattern that has the same effect.
        !           138: 
        !           139:   $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
        !           140: 
        !           141:   # Read data lines and test them
        !           142: 
        !           143:   for (;;)
        !           144:     {
        !           145:     printf "data> " if $infile eq "STDIN";
        !           146:     last NEXT_RE if ! ($_ = <$infile>);
        !           147:     chomp;
        !           148:     printf $outfile "$_\n" if $infile ne "STDIN";
        !           149: 
        !           150:     s/\s+$//;  # Remove trailing space
        !           151:     s/^\s+//;  # Remove leading space
        !           152:     s/\\Y//g;  # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
        !           153: 
        !           154:     last if ($_ eq "");
        !           155:     $x = eval "\"$_\"";   # To get escapes processed
        !           156: 
        !           157:     # Empty array for holding results, ensure $REGERROR and $REGMARK are
        !           158:     # unset, then do the matching.
        !           159: 
        !           160:     @subs = ();
        !           161: 
        !           162:     $pushes = "push \@subs,\$&;" .
        !           163:          "push \@subs,\$1;" .
        !           164:          "push \@subs,\$2;" .
        !           165:          "push \@subs,\$3;" .
        !           166:          "push \@subs,\$4;" .
        !           167:          "push \@subs,\$5;" .
        !           168:          "push \@subs,\$6;" .
        !           169:          "push \@subs,\$7;" .
        !           170:          "push \@subs,\$8;" .
        !           171:          "push \@subs,\$9;" .
        !           172:          "push \@subs,\$10;" .
        !           173:          "push \@subs,\$11;" .
        !           174:          "push \@subs,\$12;" .
        !           175:          "push \@subs,\$13;" .
        !           176:          "push \@subs,\$14;" .
        !           177:          "push \@subs,\$15;" .
        !           178:          "push \@subs,\$16;" .
        !           179:          "push \@subs,\$'; }";
        !           180: 
        !           181:     undef $REGERROR;
        !           182:     undef $REGMARK;
        !           183: 
        !           184:     eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
        !           185: 
        !           186:     if ($@)
        !           187:       {
        !           188:       printf $outfile "Error: $@\n";
        !           189:       next NEXT_RE;
        !           190:       }
        !           191:     elsif (scalar(@subs) == 0)
        !           192:       {
        !           193:       printf $outfile "No match";
        !           194:       if (defined $REGERROR && $REGERROR != 1)
        !           195:         { print $outfile (", mark = $REGERROR"); }
        !           196:       printf $outfile "\n";
        !           197:       }
        !           198:     else
        !           199:       {
        !           200:       while (scalar(@subs) != 0)
        !           201:         {
        !           202:         printf $outfile (" 0: %s\n", &pchars($subs[0]));
        !           203:         printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
        !           204:         $last_printed = 0;
        !           205:         for ($i = 1; $i <= 16; $i++)
        !           206:           {
        !           207:           if (defined $subs[$i])
        !           208:             {
        !           209:             while ($last_printed++ < $i-1)
        !           210:               { printf $outfile ("%2d: <unset>\n", $last_printed); }
        !           211:             printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
        !           212:             $last_printed = $i;
        !           213:             }
        !           214:           }
        !           215:         splice(@subs, 0, 18);
        !           216:         }
        !           217:       if (defined $REGMARK && $REGMARK != 1)
        !           218:         { print $outfile ("MK: $REGMARK\n"); }
        !           219:       }
        !           220:     }
        !           221:   }
        !           222: 
        !           223: # printf $outfile "\n";
        !           224: 
        !           225: # End

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>